Fix: sched instrumentation on stable RT kernels
[lttng-modules.git] / instrumentation / events / lttng-module / sched.h
1 #undef TRACE_SYSTEM
2 #define TRACE_SYSTEM sched
3
4 #if !defined(LTTNG_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
5 #define LTTNG_TRACE_SCHED_H
6
7 #include <probes/lttng-tracepoint-event.h>
8 #include <linux/sched.h>
9 #include <linux/pid_namespace.h>
10 #include <linux/binfmts.h>
11 #include <linux/version.h>
12 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0))
13 #include <linux/sched/rt.h>
14 #endif
15
16 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0))
17 #define lttng_proc_inum ns.inum
18 #else
19 #define lttng_proc_inum proc_inum
20 #endif
21
22 #define LTTNG_MAX_PID_NS_LEVEL 32
23
24 #ifndef _TRACE_SCHED_DEF_
25 #define _TRACE_SCHED_DEF_
26
27 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0))
28
29 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
30 {
31 #ifdef CONFIG_SCHED_DEBUG
32 BUG_ON(p != current);
33 #endif /* CONFIG_SCHED_DEBUG */
34 /*
35 * Preemption ignores task state, therefore preempted tasks are always RUNNING
36 * (we will not have dequeued if state != RUNNING).
37 */
38 return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state;
39 }
40
41 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0))
42
43 static inline long __trace_sched_switch_state(struct task_struct *p)
44 {
45 long state = p->state;
46
47 #ifdef CONFIG_PREEMPT
48 #ifdef CONFIG_SCHED_DEBUG
49 BUG_ON(p != current);
50 #endif /* CONFIG_SCHED_DEBUG */
51 /*
52 * For all intents and purposes a preempted task is a running task.
53 */
54 if (preempt_count() & PREEMPT_ACTIVE)
55 state = TASK_RUNNING | TASK_STATE_MAX;
56 #endif /* CONFIG_PREEMPT */
57
58 return state;
59 }
60
61 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0))
62
63 static inline long __trace_sched_switch_state(struct task_struct *p)
64 {
65 long state = p->state;
66
67 #ifdef CONFIG_PREEMPT
68 /*
69 * For all intents and purposes a preempted task is a running task.
70 */
71 if (task_preempt_count(p) & PREEMPT_ACTIVE)
72 state = TASK_RUNNING | TASK_STATE_MAX;
73 #endif
74
75 return state;
76 }
77
78 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0))
79
80 static inline long __trace_sched_switch_state(struct task_struct *p)
81 {
82 long state = p->state;
83
84 #ifdef CONFIG_PREEMPT
85 /*
86 * For all intents and purposes a preempted task is a running task.
87 */
88 if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
89 state = TASK_RUNNING | TASK_STATE_MAX;
90 #endif
91
92 return state;
93 }
94
95 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
96
97 static inline long __trace_sched_switch_state(struct task_struct *p)
98 {
99 long state = p->state;
100
101 #ifdef CONFIG_PREEMPT
102 /*
103 * For all intents and purposes a preempted task is a running task.
104 */
105 if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
106 state = TASK_RUNNING;
107 #endif
108
109 return state;
110 }
111
112 #endif
113
114 #endif /* _TRACE_SCHED_DEF_ */
115
116 /*
117 * Tracepoint for calling kthread_stop, performed to end a kthread:
118 */
119 LTTNG_TRACEPOINT_EVENT(sched_kthread_stop,
120
121 TP_PROTO(struct task_struct *t),
122
123 TP_ARGS(t),
124
125 TP_FIELDS(
126 ctf_array_text(char, comm, t->comm, TASK_COMM_LEN)
127 ctf_integer(pid_t, tid, t->pid)
128 )
129 )
130
131 /*
132 * Tracepoint for the return value of the kthread stopping:
133 */
134 LTTNG_TRACEPOINT_EVENT(sched_kthread_stop_ret,
135
136 TP_PROTO(int ret),
137
138 TP_ARGS(ret),
139
140 TP_FIELDS(
141 ctf_integer(int, ret, ret)
142 )
143 )
144
145 /*
146 * Tracepoint for waking up a task:
147 */
148 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0) || \
149 LTTNG_RT_KERNEL_RANGE(4,1,10,11, 4,2,0,0) || \
150 LTTNG_RT_KERNEL_RANGE(3,18,27,26, 3,19,0,0) || \
151 LTTNG_RT_KERNEL_RANGE(3,14,61,63, 3,15,0,0) || \
152 LTTNG_RT_KERNEL_RANGE(3,12,54,73, 3,13,0,0) || \
153 LTTNG_RT_KERNEL_RANGE(3,10,97,106, 3,11,0,0) || \
154 LTTNG_RT_KERNEL_RANGE(3,4,110,139, 3,5,0,0) || \
155 LTTNG_RT_KERNEL_RANGE(3,2,77,111, 3,3,0,0))
156 LTTNG_TRACEPOINT_EVENT_CLASS(sched_wakeup_template,
157
158 TP_PROTO(struct task_struct *p),
159
160 TP_ARGS(p),
161
162 TP_FIELDS(
163 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
164 ctf_integer(pid_t, tid, p->pid)
165 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
166 ctf_integer(int, target_cpu, task_cpu(p))
167 )
168 )
169 #else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)) */
170 LTTNG_TRACEPOINT_EVENT_CLASS(sched_wakeup_template,
171
172 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
173 TP_PROTO(struct task_struct *p, int success),
174
175 TP_ARGS(p, success),
176 #else
177 TP_PROTO(struct rq *rq, struct task_struct *p, int success),
178
179 TP_ARGS(rq, p, success),
180 #endif
181
182 TP_FIELDS(
183 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
184 ctf_integer(pid_t, tid, p->pid)
185 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
186 ctf_integer(int, success, success)
187 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32))
188 ctf_integer(int, target_cpu, task_cpu(p))
189 #endif
190 )
191 )
192 #endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)) */
193
194 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0) || \
195 LTTNG_RT_KERNEL_RANGE(4,1,10,11, 4,2,0,0) || \
196 LTTNG_RT_KERNEL_RANGE(3,18,27,26, 3,19,0,0) || \
197 LTTNG_RT_KERNEL_RANGE(3,14,61,63, 3,15,0,0) || \
198 LTTNG_RT_KERNEL_RANGE(3,12,54,73, 3,13,0,0) || \
199 LTTNG_RT_KERNEL_RANGE(3,10,97,106, 3,11,0,0) || \
200 LTTNG_RT_KERNEL_RANGE(3,4,110,139, 3,5,0,0) || \
201 LTTNG_RT_KERNEL_RANGE(3,2,77,111, 3,3,0,0))
202
203 /*
204 * Tracepoint called when waking a task; this tracepoint is guaranteed to be
205 * called from the waking context.
206 */
207 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_waking,
208 TP_PROTO(struct task_struct *p),
209 TP_ARGS(p))
210
211 /*
212 * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
213 * It it not always called from the waking context.
214 */
215 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup,
216 TP_PROTO(struct task_struct *p),
217 TP_ARGS(p))
218
219 /*
220 * Tracepoint for waking up a new task:
221 */
222 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new,
223 TP_PROTO(struct task_struct *p),
224 TP_ARGS(p))
225
226 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
227
228 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup,
229 TP_PROTO(struct task_struct *p, int success),
230 TP_ARGS(p, success))
231
232 /*
233 * Tracepoint for waking up a new task:
234 */
235 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new,
236 TP_PROTO(struct task_struct *p, int success),
237 TP_ARGS(p, success))
238
239 #else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */
240
241 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup,
242 TP_PROTO(struct rq *rq, struct task_struct *p, int success),
243 TP_ARGS(rq, p, success))
244
245 /*
246 * Tracepoint for waking up a new task:
247 */
248 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new,
249 TP_PROTO(struct rq *rq, struct task_struct *p, int success),
250 TP_ARGS(rq, p, success))
251
252 #endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */
253
254 /*
255 * Tracepoint for task switches, performed by the scheduler:
256 */
257 LTTNG_TRACEPOINT_EVENT(sched_switch,
258
259 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0))
260 TP_PROTO(bool preempt,
261 struct task_struct *prev,
262 struct task_struct *next),
263
264 TP_ARGS(preempt, prev, next),
265 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
266 TP_PROTO(struct task_struct *prev,
267 struct task_struct *next),
268
269 TP_ARGS(prev, next),
270 #else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */
271 TP_PROTO(struct rq *rq, struct task_struct *prev,
272 struct task_struct *next),
273
274 TP_ARGS(rq, prev, next),
275 #endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */
276
277 TP_FIELDS(
278 ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN)
279 ctf_integer(pid_t, prev_tid, prev->pid)
280 ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO)
281 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0))
282 ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev))
283 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
284 ctf_integer(long, prev_state, __trace_sched_switch_state(prev))
285 #else
286 ctf_integer(long, prev_state, prev->state)
287 #endif
288 ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN)
289 ctf_integer(pid_t, next_tid, next->pid)
290 ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO)
291 )
292 )
293
294 /*
295 * Tracepoint for a task being migrated:
296 */
297 LTTNG_TRACEPOINT_EVENT(sched_migrate_task,
298
299 TP_PROTO(struct task_struct *p, int dest_cpu),
300
301 TP_ARGS(p, dest_cpu),
302
303 TP_FIELDS(
304 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
305 ctf_integer(pid_t, tid, p->pid)
306 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
307 ctf_integer(int, orig_cpu, task_cpu(p))
308 ctf_integer(int, dest_cpu, dest_cpu)
309 )
310 )
311
312 LTTNG_TRACEPOINT_EVENT_CLASS(sched_process_template,
313
314 TP_PROTO(struct task_struct *p),
315
316 TP_ARGS(p),
317
318 TP_FIELDS(
319 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
320 ctf_integer(pid_t, tid, p->pid)
321 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
322 )
323 )
324
325 /*
326 * Tracepoint for freeing a task:
327 */
328 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_process_free,
329 TP_PROTO(struct task_struct *p),
330 TP_ARGS(p))
331
332
333 /*
334 * Tracepoint for a task exiting:
335 */
336 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_process_exit,
337 TP_PROTO(struct task_struct *p),
338 TP_ARGS(p))
339
340 /*
341 * Tracepoint for waiting on task to unschedule:
342 */
343 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
344 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_wait_task,
345 TP_PROTO(struct task_struct *p),
346 TP_ARGS(p))
347 #else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */
348 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_wait_task,
349 TP_PROTO(struct rq *rq, struct task_struct *p),
350 TP_ARGS(rq, p))
351 #endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */
352
353 /*
354 * Tracepoint for a waiting task:
355 */
356 LTTNG_TRACEPOINT_EVENT(sched_process_wait,
357
358 TP_PROTO(struct pid *pid),
359
360 TP_ARGS(pid),
361
362 TP_FIELDS(
363 ctf_array_text(char, comm, current->comm, TASK_COMM_LEN)
364 ctf_integer(pid_t, tid, pid_nr(pid))
365 ctf_integer(int, prio, current->prio - MAX_RT_PRIO)
366 )
367 )
368
369 /*
370 * Tracepoint for do_fork.
371 * Saving both TID and PID information, especially for the child, allows
372 * trace analyzers to distinguish between creation of a new process and
373 * creation of a new thread. Newly created processes will have child_tid
374 * == child_pid, while creation of a thread yields to child_tid !=
375 * child_pid.
376 */
377 LTTNG_TRACEPOINT_EVENT_CODE(sched_process_fork,
378
379 TP_PROTO(struct task_struct *parent, struct task_struct *child),
380
381 TP_ARGS(parent, child),
382
383 TP_locvar(
384 pid_t vtids[LTTNG_MAX_PID_NS_LEVEL];
385 unsigned int ns_level;
386 ),
387
388 TP_code_pre(
389 if (child) {
390 struct pid *child_pid;
391 unsigned int i;
392
393 child_pid = task_pid(child);
394 tp_locvar->ns_level =
395 min_t(unsigned int, child_pid->level + 1,
396 LTTNG_MAX_PID_NS_LEVEL);
397 for (i = 0; i < tp_locvar->ns_level; i++)
398 tp_locvar->vtids[i] = child_pid->numbers[i].nr;
399 }
400 ),
401
402 TP_FIELDS(
403 ctf_array_text(char, parent_comm, parent->comm, TASK_COMM_LEN)
404 ctf_integer(pid_t, parent_tid, parent->pid)
405 ctf_integer(pid_t, parent_pid, parent->tgid)
406 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0))
407 ctf_integer(unsigned int, parent_ns_inum,
408 ({
409 unsigned int parent_ns_inum = 0;
410
411 if (parent) {
412 struct pid_namespace *pid_ns;
413
414 pid_ns = task_active_pid_ns(parent);
415 if (pid_ns)
416 parent_ns_inum =
417 pid_ns->lttng_proc_inum;
418 }
419 parent_ns_inum;
420 }))
421 #endif
422 ctf_array_text(char, child_comm, child->comm, TASK_COMM_LEN)
423 ctf_integer(pid_t, child_tid, child->pid)
424 ctf_sequence(pid_t, vtids, tp_locvar->vtids, u8, tp_locvar->ns_level)
425 ctf_integer(pid_t, child_pid, child->tgid)
426 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0))
427 ctf_integer(unsigned int, child_ns_inum,
428 ({
429 unsigned int child_ns_inum = 0;
430
431 if (child) {
432 struct pid_namespace *pid_ns;
433
434 pid_ns = task_active_pid_ns(child);
435 if (pid_ns)
436 child_ns_inum =
437 pid_ns->lttng_proc_inum;
438 }
439 child_ns_inum;
440 }))
441 #endif
442 ),
443
444 TP_code_post()
445 )
446
447 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33))
448 /*
449 * Tracepoint for sending a signal:
450 */
451 LTTNG_TRACEPOINT_EVENT(sched_signal_send,
452
453 TP_PROTO(int sig, struct task_struct *p),
454
455 TP_ARGS(sig, p),
456
457 TP_FIELDS(
458 ctf_integer(int, sig, sig)
459 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
460 ctf_integer(pid_t, tid, p->pid)
461 )
462 )
463 #endif
464
465 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,4,0))
466 /*
467 * Tracepoint for exec:
468 */
469 LTTNG_TRACEPOINT_EVENT(sched_process_exec,
470
471 TP_PROTO(struct task_struct *p, pid_t old_pid,
472 struct linux_binprm *bprm),
473
474 TP_ARGS(p, old_pid, bprm),
475
476 TP_FIELDS(
477 ctf_string(filename, bprm->filename)
478 ctf_integer(pid_t, tid, p->pid)
479 ctf_integer(pid_t, old_tid, old_pid)
480 )
481 )
482 #endif
483
484 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32))
485 /*
486 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
487 * adding sched_stat support to SCHED_FIFO/RR would be welcome.
488 */
489 LTTNG_TRACEPOINT_EVENT_CLASS(sched_stat_template,
490
491 TP_PROTO(struct task_struct *tsk, u64 delay),
492
493 TP_ARGS(tsk, delay),
494
495 TP_FIELDS(
496 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
497 ctf_integer(pid_t, tid, tsk->pid)
498 ctf_integer(u64, delay, delay)
499 )
500 )
501
502
503 /*
504 * Tracepoint for accounting wait time (time the task is runnable
505 * but not actually running due to scheduler contention).
506 */
507 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_wait,
508 TP_PROTO(struct task_struct *tsk, u64 delay),
509 TP_ARGS(tsk, delay))
510
511 /*
512 * Tracepoint for accounting sleep time (time the task is not runnable,
513 * including iowait, see below).
514 */
515 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_sleep,
516 TP_PROTO(struct task_struct *tsk, u64 delay),
517 TP_ARGS(tsk, delay))
518
519 /*
520 * Tracepoint for accounting iowait time (time the task is not runnable
521 * due to waiting on IO to complete).
522 */
523 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_iowait,
524 TP_PROTO(struct task_struct *tsk, u64 delay),
525 TP_ARGS(tsk, delay))
526
527 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0))
528 /*
529 * Tracepoint for accounting blocked time (time the task is in uninterruptible).
530 */
531 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_blocked,
532 TP_PROTO(struct task_struct *tsk, u64 delay),
533 TP_ARGS(tsk, delay))
534 #endif
535
536 /*
537 * Tracepoint for accounting runtime (time the task is executing
538 * on a CPU).
539 */
540 LTTNG_TRACEPOINT_EVENT(sched_stat_runtime,
541
542 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
543
544 TP_ARGS(tsk, runtime, vruntime),
545
546 TP_FIELDS(
547 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
548 ctf_integer(pid_t, tid, tsk->pid)
549 ctf_integer(u64, runtime, runtime)
550 ctf_integer(u64, vruntime, vruntime)
551 )
552 )
553 #endif
554
555 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) || \
556 LTTNG_RT_KERNEL_RANGE(4,9,27,18, 4,10,0,0) || \
557 LTTNG_RT_KERNEL_RANGE(4,11,5,1, 4,12,0,0))
558 /*
559 * Tracepoint for showing priority inheritance modifying a tasks
560 * priority.
561 */
562 LTTNG_TRACEPOINT_EVENT(sched_pi_setprio,
563
564 TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),
565
566 TP_ARGS(tsk, pi_task),
567
568 TP_FIELDS(
569 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
570 ctf_integer(pid_t, tid, tsk->pid)
571 ctf_integer(int, oldprio, tsk->prio - MAX_RT_PRIO)
572 ctf_integer(int, newprio, pi_task ? pi_task->prio - MAX_RT_PRIO : tsk->prio - MAX_RT_PRIO)
573 )
574 )
575 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37))
576 /*
577 * Tracepoint for showing priority inheritance modifying a tasks
578 * priority.
579 */
580 LTTNG_TRACEPOINT_EVENT(sched_pi_setprio,
581
582 TP_PROTO(struct task_struct *tsk, int newprio),
583
584 TP_ARGS(tsk, newprio),
585
586 TP_FIELDS(
587 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
588 ctf_integer(pid_t, tid, tsk->pid)
589 ctf_integer(int, oldprio, tsk->prio - MAX_RT_PRIO)
590 ctf_integer(int, newprio, newprio - MAX_RT_PRIO)
591 )
592 )
593 #endif
594
595 #endif /* LTTNG_TRACE_SCHED_H */
596
597 /* This part must be outside protection */
598 #include <probes/define_trace.h>
This page took 0.042281 seconds and 4 git commands to generate.