Fix: update sched prev_state instrumentation for upstream kernel
[lttng-modules.git] / instrumentation / events / lttng-module / sched.h
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #undef TRACE_SYSTEM
3 #define TRACE_SYSTEM sched
4
5 #if !defined(LTTNG_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
6 #define LTTNG_TRACE_SCHED_H
7
8 #include <probes/lttng-tracepoint-event.h>
9 #include <linux/sched.h>
10 #include <linux/pid_namespace.h>
11 #include <linux/binfmts.h>
12 #include <linux/version.h>
13 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0))
14 #include <linux/sched/rt.h>
15 #endif
16
17 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0))
18 #define lttng_proc_inum ns.inum
19 #else
20 #define lttng_proc_inum proc_inum
21 #endif
22
23 #define LTTNG_MAX_PID_NS_LEVEL 32
24
25 #ifndef _TRACE_SCHED_DEF_
26 #define _TRACE_SCHED_DEF_
27
28 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0))
29
30 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
31 {
32 unsigned int state;
33
34 #ifdef CONFIG_SCHED_DEBUG
35 BUG_ON(p != current);
36 #endif /* CONFIG_SCHED_DEBUG */
37
38 /*
39 * Preemption ignores task state, therefore preempted tasks are always
40 * RUNNING (we will not have dequeued if state != RUNNING).
41 */
42 if (preempt)
43 return TASK_REPORT_MAX;
44
45 /*
46 * task_state_index() uses fls() and returns a value from 0-8 range.
47 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
48 * it for left shift operation to get the correct task->state
49 * mapping.
50 */
51 state = task_state_index(p);
52
53 return state ? (1 << (state - 1)) : state;
54 }
55
56 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0))
57
58 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
59 {
60 unsigned int state;
61
62 #ifdef CONFIG_SCHED_DEBUG
63 BUG_ON(p != current);
64 #endif /* CONFIG_SCHED_DEBUG */
65
66 /*
67 * Preemption ignores task state, therefore preempted tasks are always
68 * RUNNING (we will not have dequeued if state != RUNNING).
69 */
70 if (preempt)
71 return TASK_REPORT_MAX;
72
73 /*
74 * __get_task_state() uses fls() and returns a value from 0-8 range.
75 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
76 * it for left shift operation to get the correct task->state
77 * mapping.
78 */
79 state = __get_task_state(p);
80
81 return state ? (1 << (state - 1)) : state;
82 }
83
84 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0))
85
86 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
87 {
88 #ifdef CONFIG_SCHED_DEBUG
89 BUG_ON(p != current);
90 #endif /* CONFIG_SCHED_DEBUG */
91 /*
92 * Preemption ignores task state, therefore preempted tasks are always RUNNING
93 * (we will not have dequeued if state != RUNNING).
94 */
95 return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state;
96 }
97
98 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0))
99
100 static inline long __trace_sched_switch_state(struct task_struct *p)
101 {
102 long state = p->state;
103
104 #ifdef CONFIG_PREEMPT
105 #ifdef CONFIG_SCHED_DEBUG
106 BUG_ON(p != current);
107 #endif /* CONFIG_SCHED_DEBUG */
108 /*
109 * For all intents and purposes a preempted task is a running task.
110 */
111 if (preempt_count() & PREEMPT_ACTIVE)
112 state = TASK_RUNNING | TASK_STATE_MAX;
113 #endif /* CONFIG_PREEMPT */
114
115 return state;
116 }
117
118 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3,13,0))
119
120 static inline long __trace_sched_switch_state(struct task_struct *p)
121 {
122 long state = p->state;
123
124 #ifdef CONFIG_PREEMPT
125 /*
126 * For all intents and purposes a preempted task is a running task.
127 */
128 if (task_preempt_count(p) & PREEMPT_ACTIVE)
129 state = TASK_RUNNING | TASK_STATE_MAX;
130 #endif
131
132 return state;
133 }
134
135 #elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0))
136
137 static inline long __trace_sched_switch_state(struct task_struct *p)
138 {
139 long state = p->state;
140
141 #ifdef CONFIG_PREEMPT
142 /*
143 * For all intents and purposes a preempted task is a running task.
144 */
145 if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
146 state = TASK_RUNNING | TASK_STATE_MAX;
147 #endif
148
149 return state;
150 }
151
152 #else
153
154 static inline long __trace_sched_switch_state(struct task_struct *p)
155 {
156 long state = p->state;
157
158 #ifdef CONFIG_PREEMPT
159 /*
160 * For all intents and purposes a preempted task is a running task.
161 */
162 if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
163 state = TASK_RUNNING;
164 #endif
165
166 return state;
167 }
168
169 #endif
170
171 #endif /* _TRACE_SCHED_DEF_ */
172
173 /*
174 * Tracepoint for calling kthread_stop, performed to end a kthread:
175 */
176 LTTNG_TRACEPOINT_EVENT(sched_kthread_stop,
177
178 TP_PROTO(struct task_struct *t),
179
180 TP_ARGS(t),
181
182 TP_FIELDS(
183 ctf_array_text(char, comm, t->comm, TASK_COMM_LEN)
184 ctf_integer(pid_t, tid, t->pid)
185 )
186 )
187
188 /*
189 * Tracepoint for the return value of the kthread stopping:
190 */
191 LTTNG_TRACEPOINT_EVENT(sched_kthread_stop_ret,
192
193 TP_PROTO(int ret),
194
195 TP_ARGS(ret),
196
197 TP_FIELDS(
198 ctf_integer(int, ret, ret)
199 )
200 )
201
202 /*
203 * Tracepoint for waking up a task:
204 */
205 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0) || \
206 LTTNG_RT_KERNEL_RANGE(4,1,10,11, 4,2,0,0) || \
207 LTTNG_RT_KERNEL_RANGE(3,18,27,26, 3,19,0,0) || \
208 LTTNG_RT_KERNEL_RANGE(3,14,61,63, 3,15,0,0) || \
209 LTTNG_RT_KERNEL_RANGE(3,12,54,73, 3,13,0,0) || \
210 LTTNG_RT_KERNEL_RANGE(3,10,97,106, 3,11,0,0) || \
211 LTTNG_RT_KERNEL_RANGE(3,4,110,139, 3,5,0,0) || \
212 LTTNG_RT_KERNEL_RANGE(3,2,77,111, 3,3,0,0))
213 LTTNG_TRACEPOINT_EVENT_CLASS(sched_wakeup_template,
214
215 TP_PROTO(struct task_struct *p),
216
217 TP_ARGS(p),
218
219 TP_FIELDS(
220 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
221 ctf_integer(pid_t, tid, p->pid)
222 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
223 ctf_integer(int, target_cpu, task_cpu(p))
224 )
225 )
226 #else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)) */
227 LTTNG_TRACEPOINT_EVENT_CLASS(sched_wakeup_template,
228
229 TP_PROTO(struct task_struct *p, int success),
230
231 TP_ARGS(p, success),
232
233 TP_FIELDS(
234 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
235 ctf_integer(pid_t, tid, p->pid)
236 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
237 ctf_integer(int, success, success)
238 ctf_integer(int, target_cpu, task_cpu(p))
239 )
240 )
241 #endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)) */
242
243 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0) || \
244 LTTNG_RT_KERNEL_RANGE(4,1,10,11, 4,2,0,0) || \
245 LTTNG_RT_KERNEL_RANGE(3,18,27,26, 3,19,0,0) || \
246 LTTNG_RT_KERNEL_RANGE(3,14,61,63, 3,15,0,0) || \
247 LTTNG_RT_KERNEL_RANGE(3,12,54,73, 3,13,0,0) || \
248 LTTNG_RT_KERNEL_RANGE(3,10,97,106, 3,11,0,0) || \
249 LTTNG_RT_KERNEL_RANGE(3,4,110,139, 3,5,0,0) || \
250 LTTNG_RT_KERNEL_RANGE(3,2,77,111, 3,3,0,0))
251
252 /*
253 * Tracepoint called when waking a task; this tracepoint is guaranteed to be
254 * called from the waking context.
255 */
256 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_waking,
257 TP_PROTO(struct task_struct *p),
258 TP_ARGS(p))
259
260 /*
261 * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
262 * It it not always called from the waking context.
263 */
264 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup,
265 TP_PROTO(struct task_struct *p),
266 TP_ARGS(p))
267
268 /*
269 * Tracepoint for waking up a new task:
270 */
271 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new,
272 TP_PROTO(struct task_struct *p),
273 TP_ARGS(p))
274
275 #else
276
277 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup,
278 TP_PROTO(struct task_struct *p, int success),
279 TP_ARGS(p, success))
280
281 /*
282 * Tracepoint for waking up a new task:
283 */
284 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new,
285 TP_PROTO(struct task_struct *p, int success),
286 TP_ARGS(p, success))
287
288 #endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)) */
289
290 /*
291 * Tracepoint for task switches, performed by the scheduler:
292 */
293 LTTNG_TRACEPOINT_EVENT(sched_switch,
294
295 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0))
296 TP_PROTO(bool preempt,
297 struct task_struct *prev,
298 struct task_struct *next),
299
300 TP_ARGS(preempt, prev, next),
301 #else
302 TP_PROTO(struct task_struct *prev,
303 struct task_struct *next),
304
305 TP_ARGS(prev, next),
306 #endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0)) */
307
308 TP_FIELDS(
309 ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN)
310 ctf_integer(pid_t, prev_tid, prev->pid)
311 ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO)
312 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0))
313 ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev))
314 #else
315 ctf_integer(long, prev_state, __trace_sched_switch_state(prev))
316 #endif
317 ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN)
318 ctf_integer(pid_t, next_tid, next->pid)
319 ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO)
320 )
321 )
322
323 /*
324 * Tracepoint for a task being migrated:
325 */
326 LTTNG_TRACEPOINT_EVENT(sched_migrate_task,
327
328 TP_PROTO(struct task_struct *p, int dest_cpu),
329
330 TP_ARGS(p, dest_cpu),
331
332 TP_FIELDS(
333 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
334 ctf_integer(pid_t, tid, p->pid)
335 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
336 ctf_integer(int, orig_cpu, task_cpu(p))
337 ctf_integer(int, dest_cpu, dest_cpu)
338 )
339 )
340
341 LTTNG_TRACEPOINT_EVENT_CLASS(sched_process_template,
342
343 TP_PROTO(struct task_struct *p),
344
345 TP_ARGS(p),
346
347 TP_FIELDS(
348 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
349 ctf_integer(pid_t, tid, p->pid)
350 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
351 )
352 )
353
354 /*
355 * Tracepoint for freeing a task:
356 */
357 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_process_free,
358 TP_PROTO(struct task_struct *p),
359 TP_ARGS(p))
360
361
362 /*
363 * Tracepoint for a task exiting:
364 */
365 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_process_exit,
366 TP_PROTO(struct task_struct *p),
367 TP_ARGS(p))
368
369 /*
370 * Tracepoint for waiting on task to unschedule:
371 */
372 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_wait_task,
373 TP_PROTO(struct task_struct *p),
374 TP_ARGS(p))
375
376 /*
377 * Tracepoint for a waiting task:
378 */
379 LTTNG_TRACEPOINT_EVENT(sched_process_wait,
380
381 TP_PROTO(struct pid *pid),
382
383 TP_ARGS(pid),
384
385 TP_FIELDS(
386 ctf_array_text(char, comm, current->comm, TASK_COMM_LEN)
387 ctf_integer(pid_t, tid, pid_nr(pid))
388 ctf_integer(int, prio, current->prio - MAX_RT_PRIO)
389 )
390 )
391
392 /*
393 * Tracepoint for do_fork.
394 * Saving both TID and PID information, especially for the child, allows
395 * trace analyzers to distinguish between creation of a new process and
396 * creation of a new thread. Newly created processes will have child_tid
397 * == child_pid, while creation of a thread yields to child_tid !=
398 * child_pid.
399 */
400 LTTNG_TRACEPOINT_EVENT_CODE(sched_process_fork,
401
402 TP_PROTO(struct task_struct *parent, struct task_struct *child),
403
404 TP_ARGS(parent, child),
405
406 TP_locvar(
407 pid_t vtids[LTTNG_MAX_PID_NS_LEVEL];
408 unsigned int ns_level;
409 ),
410
411 TP_code_pre(
412 if (child) {
413 struct pid *child_pid;
414 unsigned int i;
415
416 child_pid = task_pid(child);
417 tp_locvar->ns_level =
418 min_t(unsigned int, child_pid->level + 1,
419 LTTNG_MAX_PID_NS_LEVEL);
420 for (i = 0; i < tp_locvar->ns_level; i++)
421 tp_locvar->vtids[i] = child_pid->numbers[i].nr;
422 }
423 ),
424
425 TP_FIELDS(
426 ctf_array_text(char, parent_comm, parent->comm, TASK_COMM_LEN)
427 ctf_integer(pid_t, parent_tid, parent->pid)
428 ctf_integer(pid_t, parent_pid, parent->tgid)
429 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0))
430 ctf_integer(unsigned int, parent_ns_inum,
431 ({
432 unsigned int parent_ns_inum = 0;
433
434 if (parent) {
435 struct pid_namespace *pid_ns;
436
437 pid_ns = task_active_pid_ns(parent);
438 if (pid_ns)
439 parent_ns_inum =
440 pid_ns->lttng_proc_inum;
441 }
442 parent_ns_inum;
443 }))
444 #endif
445 ctf_array_text(char, child_comm, child->comm, TASK_COMM_LEN)
446 ctf_integer(pid_t, child_tid, child->pid)
447 ctf_sequence(pid_t, vtids, tp_locvar->vtids, u8, tp_locvar->ns_level)
448 ctf_integer(pid_t, child_pid, child->tgid)
449 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0))
450 ctf_integer(unsigned int, child_ns_inum,
451 ({
452 unsigned int child_ns_inum = 0;
453
454 if (child) {
455 struct pid_namespace *pid_ns;
456
457 pid_ns = task_active_pid_ns(child);
458 if (pid_ns)
459 child_ns_inum =
460 pid_ns->lttng_proc_inum;
461 }
462 child_ns_inum;
463 }))
464 #endif
465 ),
466
467 TP_code_post()
468 )
469
470 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,4,0))
471 /*
472 * Tracepoint for exec:
473 */
474 LTTNG_TRACEPOINT_EVENT(sched_process_exec,
475
476 TP_PROTO(struct task_struct *p, pid_t old_pid,
477 struct linux_binprm *bprm),
478
479 TP_ARGS(p, old_pid, bprm),
480
481 TP_FIELDS(
482 ctf_string(filename, bprm->filename)
483 ctf_integer(pid_t, tid, p->pid)
484 ctf_integer(pid_t, old_tid, old_pid)
485 )
486 )
487 #endif
488
489 /*
490 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
491 * adding sched_stat support to SCHED_FIFO/RR would be welcome.
492 */
493 LTTNG_TRACEPOINT_EVENT_CLASS(sched_stat_template,
494
495 TP_PROTO(struct task_struct *tsk, u64 delay),
496
497 TP_ARGS(tsk, delay),
498
499 TP_FIELDS(
500 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
501 ctf_integer(pid_t, tid, tsk->pid)
502 ctf_integer(u64, delay, delay)
503 )
504 )
505
506
507 /*
508 * Tracepoint for accounting wait time (time the task is runnable
509 * but not actually running due to scheduler contention).
510 */
511 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_wait,
512 TP_PROTO(struct task_struct *tsk, u64 delay),
513 TP_ARGS(tsk, delay))
514
515 /*
516 * Tracepoint for accounting sleep time (time the task is not runnable,
517 * including iowait, see below).
518 */
519 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_sleep,
520 TP_PROTO(struct task_struct *tsk, u64 delay),
521 TP_ARGS(tsk, delay))
522
523 /*
524 * Tracepoint for accounting iowait time (time the task is not runnable
525 * due to waiting on IO to complete).
526 */
527 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_iowait,
528 TP_PROTO(struct task_struct *tsk, u64 delay),
529 TP_ARGS(tsk, delay))
530
531 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0))
532 /*
533 * Tracepoint for accounting blocked time (time the task is in uninterruptible).
534 */
535 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_blocked,
536 TP_PROTO(struct task_struct *tsk, u64 delay),
537 TP_ARGS(tsk, delay))
538 #endif
539
540 /*
541 * Tracepoint for accounting runtime (time the task is executing
542 * on a CPU).
543 */
544 LTTNG_TRACEPOINT_EVENT(sched_stat_runtime,
545
546 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
547
548 TP_ARGS(tsk, runtime, vruntime),
549
550 TP_FIELDS(
551 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
552 ctf_integer(pid_t, tid, tsk->pid)
553 ctf_integer(u64, runtime, runtime)
554 ctf_integer(u64, vruntime, vruntime)
555 )
556 )
557
558 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) || \
559 LTTNG_RT_KERNEL_RANGE(4,9,27,18, 4,10,0,0) || \
560 LTTNG_RT_KERNEL_RANGE(4,11,5,1, 4,12,0,0))
561 /*
562 * Tracepoint for showing priority inheritance modifying a tasks
563 * priority.
564 */
565 LTTNG_TRACEPOINT_EVENT(sched_pi_setprio,
566
567 TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),
568
569 TP_ARGS(tsk, pi_task),
570
571 TP_FIELDS(
572 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
573 ctf_integer(pid_t, tid, tsk->pid)
574 ctf_integer(int, oldprio, tsk->prio - MAX_RT_PRIO)
575 ctf_integer(int, newprio, pi_task ? pi_task->prio - MAX_RT_PRIO : tsk->prio - MAX_RT_PRIO)
576 )
577 )
578 #else
579 /*
580 * Tracepoint for showing priority inheritance modifying a tasks
581 * priority.
582 */
583 LTTNG_TRACEPOINT_EVENT(sched_pi_setprio,
584
585 TP_PROTO(struct task_struct *tsk, int newprio),
586
587 TP_ARGS(tsk, newprio),
588
589 TP_FIELDS(
590 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
591 ctf_integer(pid_t, tid, tsk->pid)
592 ctf_integer(int, oldprio, tsk->prio - MAX_RT_PRIO)
593 ctf_integer(int, newprio, newprio - MAX_RT_PRIO)
594 )
595 )
596 #endif
597
598 #endif /* LTTNG_TRACE_SCHED_H */
599
600 /* This part must be outside protection */
601 #include <probes/define_trace.h>
This page took 0.041457 seconds and 4 git commands to generate.