Fix: sched_stat_runtime changed in linux 6.8.0-rc1
[lttng-modules.git] / include / instrumentation / events / sched.h
... / ...
CommitLineData
1/* SPDX-License-Identifier: GPL-2.0-only */
2#undef TRACE_SYSTEM
3#define TRACE_SYSTEM sched
4
5#if !defined(LTTNG_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
6#define LTTNG_TRACE_SCHED_H
7
8#include <lttng/tracepoint-event.h>
9#include <linux/sched.h>
10#include <linux/pid_namespace.h>
11#include <linux/binfmts.h>
12#include <lttng/kernel-version.h>
13#include <linux/sched/rt.h>
14
15#define LTTNG_MAX_PID_NS_LEVEL 32
16
17#ifndef _TRACE_SCHED_DEF_
18#define _TRACE_SCHED_DEF_
19
20#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,18,0) \
21 || LTTNG_RHEL_KERNEL_RANGE(5,14,0,162,0,0, 5,15,0,0,0,0))
22
23static inline long __trace_sched_switch_state(bool preempt,
24 unsigned int prev_state,
25 struct task_struct *p)
26{
27 unsigned int state;
28
29#ifdef CONFIG_SCHED_DEBUG
30 BUG_ON(p != current);
31#endif /* CONFIG_SCHED_DEBUG */
32
33 /*
34 * Preemption ignores task state, therefore preempted tasks are always
35 * RUNNING (we will not have dequeued if state != RUNNING).
36 */
37 if (preempt)
38 return TASK_REPORT_MAX;
39
40 /*
41 * task_state_index() uses fls() and returns a value from 0-8 range.
42 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
43 * it for left shift operation to get the correct task->state
44 * mapping.
45 */
46 state = __task_state_index(prev_state, p->exit_state);
47
48 return state ? (1 << (state - 1)) : state;
49}
50
51#elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,15,0))
52
53static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
54{
55 unsigned int state;
56
57#ifdef CONFIG_SCHED_DEBUG
58 BUG_ON(p != current);
59#endif /* CONFIG_SCHED_DEBUG */
60
61 /*
62 * Preemption ignores task state, therefore preempted tasks are always
63 * RUNNING (we will not have dequeued if state != RUNNING).
64 */
65 if (preempt)
66 return TASK_REPORT_MAX;
67
68 /*
69 * task_state_index() uses fls() and returns a value from 0-8 range.
70 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
71 * it for left shift operation to get the correct task->state
72 * mapping.
73 */
74 state = task_state_index(p);
75
76 return state ? (1 << (state - 1)) : state;
77}
78
79#elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,14,0))
80
81static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
82{
83 unsigned int state;
84
85#ifdef CONFIG_SCHED_DEBUG
86 BUG_ON(p != current);
87#endif /* CONFIG_SCHED_DEBUG */
88
89 /*
90 * Preemption ignores task state, therefore preempted tasks are always
91 * RUNNING (we will not have dequeued if state != RUNNING).
92 */
93 if (preempt)
94 return TASK_REPORT_MAX;
95
96 /*
97 * __get_task_state() uses fls() and returns a value from 0-8 range.
98 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
99 * it for left shift operation to get the correct task->state
100 * mapping.
101 */
102 state = __get_task_state(p);
103
104 return state ? (1 << (state - 1)) : state;
105}
106
107#else
108
109static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
110{
111#ifdef CONFIG_SCHED_DEBUG
112 BUG_ON(p != current);
113#endif /* CONFIG_SCHED_DEBUG */
114 /*
115 * Preemption ignores task state, therefore preempted tasks are always RUNNING
116 * (we will not have dequeued if state != RUNNING).
117 */
118 return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state;
119}
120#endif
121
122#endif /* _TRACE_SCHED_DEF_ */
123
124#ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
125/*
126 * Enumeration of the task state bitmask.
127 * Only bit flags are enumerated here, not composition of states.
128 */
129LTTNG_TRACEPOINT_ENUM(task_state,
130 TP_ENUM_VALUES(
131 ctf_enum_value("TASK_RUNNING", TASK_RUNNING)
132 ctf_enum_value("TASK_INTERRUPTIBLE", TASK_INTERRUPTIBLE)
133 ctf_enum_value("TASK_UNINTERRUPTIBLE", TASK_UNINTERRUPTIBLE)
134 ctf_enum_value("TASK_STOPPED", __TASK_STOPPED)
135 ctf_enum_value("TASK_TRACED", __TASK_TRACED)
136 ctf_enum_value("EXIT_DEAD", EXIT_DEAD)
137 ctf_enum_value("EXIT_ZOMBIE", EXIT_ZOMBIE)
138 ctf_enum_value("TASK_PARKED", TASK_PARKED)
139 ctf_enum_value("TASK_DEAD", TASK_DEAD)
140 ctf_enum_value("TASK_WAKEKILL", TASK_WAKEKILL)
141 ctf_enum_value("TASK_WAKING", TASK_WAKING)
142 ctf_enum_value("TASK_NOLOAD", TASK_NOLOAD)
143
144#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,8,0))
145 ctf_enum_value("TASK_NEW", TASK_NEW)
146#endif /* #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,8,0)) */
147
148 ctf_enum_value("TASK_STATE_MAX", TASK_STATE_MAX)
149 )
150)
151#endif /* CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM */
152
153/*
154 * Tracepoint for calling kthread_stop, performed to end a kthread:
155 */
156LTTNG_TRACEPOINT_EVENT(sched_kthread_stop,
157
158 TP_PROTO(struct task_struct *t),
159
160 TP_ARGS(t),
161
162 TP_FIELDS(
163 ctf_array_text(char, comm, t->comm, TASK_COMM_LEN)
164 ctf_integer(pid_t, tid, t->pid)
165 )
166)
167
168/*
169 * Tracepoint for the return value of the kthread stopping:
170 */
171LTTNG_TRACEPOINT_EVENT(sched_kthread_stop_ret,
172
173 TP_PROTO(int ret),
174
175 TP_ARGS(ret),
176
177 TP_FIELDS(
178 ctf_integer(int, ret, ret)
179 )
180)
181
182/*
183 * Tracepoint for waking up a task:
184 */
185LTTNG_TRACEPOINT_EVENT_CLASS(sched_wakeup_template,
186
187 TP_PROTO(struct task_struct *p),
188
189 TP_ARGS(p),
190
191 TP_FIELDS(
192 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
193 ctf_integer(pid_t, tid, p->pid)
194 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
195 ctf_integer(int, target_cpu, task_cpu(p))
196 )
197)
198
199/*
200 * Tracepoint called when waking a task; this tracepoint is guaranteed to be
201 * called from the waking context.
202 */
203LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_waking,
204 TP_PROTO(struct task_struct *p),
205 TP_ARGS(p))
206
207/*
208 * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
209 * It it not always called from the waking context.
210 */
211LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup,
212 TP_PROTO(struct task_struct *p),
213 TP_ARGS(p))
214
215/*
216 * Tracepoint for waking up a new task:
217 */
218LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new,
219 TP_PROTO(struct task_struct *p),
220 TP_ARGS(p))
221
222/*
223 * Tracepoint for task switches, performed by the scheduler:
224 */
225
226#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,18,0) \
227 || LTTNG_RHEL_KERNEL_RANGE(5,14,0,162,0,0, 5,15,0,0,0,0))
228LTTNG_TRACEPOINT_EVENT(sched_switch,
229
230 TP_PROTO(bool preempt,
231 struct task_struct *prev,
232 struct task_struct *next,
233 unsigned int prev_state),
234
235 TP_ARGS(preempt, prev, next, prev_state),
236
237 TP_FIELDS(
238 ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN)
239 ctf_integer(pid_t, prev_tid, prev->pid)
240 ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO)
241#ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
242 ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(preempt, prev_state, prev))
243#else
244 ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev_state, prev))
245#endif
246 ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN)
247 ctf_integer(pid_t, next_tid, next->pid)
248 ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO)
249 )
250)
251
252#else
253
254LTTNG_TRACEPOINT_EVENT(sched_switch,
255
256 TP_PROTO(bool preempt,
257 struct task_struct *prev,
258 struct task_struct *next),
259
260 TP_ARGS(preempt, prev, next),
261
262 TP_FIELDS(
263 ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN)
264 ctf_integer(pid_t, prev_tid, prev->pid)
265 ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO)
266#ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM
267 ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(preempt, prev))
268#else
269 ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev))
270#endif
271 ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN)
272 ctf_integer(pid_t, next_tid, next->pid)
273 ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO)
274 )
275)
276#endif
277
278/*
279 * Tracepoint for a task being migrated:
280 */
281LTTNG_TRACEPOINT_EVENT(sched_migrate_task,
282
283 TP_PROTO(struct task_struct *p, int dest_cpu),
284
285 TP_ARGS(p, dest_cpu),
286
287 TP_FIELDS(
288 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
289 ctf_integer(pid_t, tid, p->pid)
290 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
291 ctf_integer(int, orig_cpu, task_cpu(p))
292 ctf_integer(int, dest_cpu, dest_cpu)
293 )
294)
295
296LTTNG_TRACEPOINT_EVENT_CLASS(sched_process_template,
297
298 TP_PROTO(struct task_struct *p),
299
300 TP_ARGS(p),
301
302 TP_FIELDS(
303 ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
304 ctf_integer(pid_t, tid, p->pid)
305 ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
306 )
307)
308
309/*
310 * Tracepoint for freeing a task:
311 */
312LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_process_free,
313 TP_PROTO(struct task_struct *p),
314 TP_ARGS(p))
315
316
317/*
318 * Tracepoint for a task exiting:
319 */
320LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_process_exit,
321 TP_PROTO(struct task_struct *p),
322 TP_ARGS(p))
323
324/*
325 * Tracepoint for waiting on task to unschedule:
326 */
327LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_wait_task,
328 TP_PROTO(struct task_struct *p),
329 TP_ARGS(p))
330
331/*
332 * Tracepoint for a waiting task:
333 */
334LTTNG_TRACEPOINT_EVENT(sched_process_wait,
335
336 TP_PROTO(struct pid *pid),
337
338 TP_ARGS(pid),
339
340 TP_FIELDS(
341 ctf_array_text(char, comm, current->comm, TASK_COMM_LEN)
342 ctf_integer(pid_t, tid, pid_nr(pid))
343 ctf_integer(int, prio, current->prio - MAX_RT_PRIO)
344 )
345)
346
347/*
348 * Tracepoint for do_fork.
349 * Saving both TID and PID information, especially for the child, allows
350 * trace analyzers to distinguish between creation of a new process and
351 * creation of a new thread. Newly created processes will have child_tid
352 * == child_pid, while creation of a thread yields to child_tid !=
353 * child_pid.
354 */
355LTTNG_TRACEPOINT_EVENT_CODE(sched_process_fork,
356
357 TP_PROTO(struct task_struct *parent, struct task_struct *child),
358
359 TP_ARGS(parent, child),
360
361 TP_locvar(
362 pid_t vtids[LTTNG_MAX_PID_NS_LEVEL];
363 unsigned int ns_level;
364 ),
365
366 TP_code_pre(
367 if (child) {
368 struct pid *child_pid;
369 unsigned int i;
370
371 child_pid = task_pid(child);
372 tp_locvar->ns_level =
373 min_t(unsigned int, child_pid->level + 1,
374 LTTNG_MAX_PID_NS_LEVEL);
375 for (i = 0; i < tp_locvar->ns_level; i++)
376 tp_locvar->vtids[i] = child_pid->numbers[i].nr;
377 }
378 ),
379
380 TP_FIELDS(
381 ctf_array_text(char, parent_comm, parent->comm, TASK_COMM_LEN)
382 ctf_integer(pid_t, parent_tid, parent->pid)
383 ctf_integer(pid_t, parent_pid, parent->tgid)
384 ctf_integer(unsigned int, parent_ns_inum,
385 ({
386 unsigned int parent_ns_inum = 0;
387
388 if (parent) {
389 struct pid_namespace *pid_ns;
390
391 pid_ns = task_active_pid_ns(parent);
392 if (pid_ns)
393 parent_ns_inum =
394 pid_ns->ns.inum;
395 }
396 parent_ns_inum;
397 }))
398 ctf_array_text(char, child_comm, child->comm, TASK_COMM_LEN)
399 ctf_integer(pid_t, child_tid, child->pid)
400 ctf_sequence(pid_t, vtids, tp_locvar->vtids, u8, tp_locvar->ns_level)
401 ctf_integer(pid_t, child_pid, child->tgid)
402 ctf_integer(unsigned int, child_ns_inum,
403 ({
404 unsigned int child_ns_inum = 0;
405
406 if (child) {
407 struct pid_namespace *pid_ns;
408
409 pid_ns = task_active_pid_ns(child);
410 if (pid_ns)
411 child_ns_inum =
412 pid_ns->ns.inum;
413 }
414 child_ns_inum;
415 }))
416 ),
417
418 TP_code_post()
419)
420
421/*
422 * Tracepoint for exec:
423 */
424LTTNG_TRACEPOINT_EVENT(sched_process_exec,
425
426 TP_PROTO(struct task_struct *p, pid_t old_pid,
427 struct linux_binprm *bprm),
428
429 TP_ARGS(p, old_pid, bprm),
430
431 TP_FIELDS(
432 ctf_string(filename, bprm->filename)
433 ctf_integer(pid_t, tid, p->pid)
434 ctf_integer(pid_t, old_tid, old_pid)
435 )
436)
437
438/*
439 * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
440 * adding sched_stat support to SCHED_FIFO/RR would be welcome.
441 */
442LTTNG_TRACEPOINT_EVENT_CLASS(sched_stat_template,
443
444 TP_PROTO(struct task_struct *tsk, u64 delay),
445
446 TP_ARGS(tsk, delay),
447
448 TP_FIELDS(
449 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
450 ctf_integer(pid_t, tid, tsk->pid)
451 ctf_integer(u64, delay, delay)
452 )
453)
454
455
456/*
457 * Tracepoint for accounting wait time (time the task is runnable
458 * but not actually running due to scheduler contention).
459 */
460LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_wait,
461 TP_PROTO(struct task_struct *tsk, u64 delay),
462 TP_ARGS(tsk, delay))
463
464/*
465 * Tracepoint for accounting sleep time (time the task is not runnable,
466 * including iowait, see below).
467 */
468LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_sleep,
469 TP_PROTO(struct task_struct *tsk, u64 delay),
470 TP_ARGS(tsk, delay))
471
472/*
473 * Tracepoint for accounting iowait time (time the task is not runnable
474 * due to waiting on IO to complete).
475 */
476LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_iowait,
477 TP_PROTO(struct task_struct *tsk, u64 delay),
478 TP_ARGS(tsk, delay))
479
480/*
481 * Tracepoint for accounting blocked time (time the task is in uninterruptible).
482 */
483LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_stat_template, sched_stat_blocked,
484 TP_PROTO(struct task_struct *tsk, u64 delay),
485 TP_ARGS(tsk, delay))
486
487#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(6,8,0))
488/*
489 * Tracepoint for accounting runtime (time the task is executing
490 * on a CPU).
491 */
492LTTNG_TRACEPOINT_EVENT(sched_stat_runtime,
493
494 TP_PROTO(struct task_struct *tsk, u64 runtime),
495
496 TP_ARGS(tsk, runtime),
497
498 TP_FIELDS(
499 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
500 ctf_integer(pid_t, tid, tsk->pid)
501 ctf_integer(u64, runtime, runtime)
502 )
503)
504#else
505/*
506 * Tracepoint for accounting runtime (time the task is executing
507 * on a CPU).
508 */
509LTTNG_TRACEPOINT_EVENT(sched_stat_runtime,
510
511 TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
512
513 TP_ARGS(tsk, runtime, vruntime),
514
515 TP_FIELDS(
516 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
517 ctf_integer(pid_t, tid, tsk->pid)
518 ctf_integer(u64, runtime, runtime)
519 ctf_integer(u64, vruntime, vruntime)
520 )
521)
522#endif
523
524#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,12,0) || \
525 LTTNG_RT_KERNEL_RANGE(4,9,27,18, 4,10,0,0) || \
526 LTTNG_RT_KERNEL_RANGE(4,11,5,1, 4,12,0,0))
527/*
528 * Tracepoint for showing priority inheritance modifying a tasks
529 * priority.
530 */
531LTTNG_TRACEPOINT_EVENT(sched_pi_setprio,
532
533 TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),
534
535 TP_ARGS(tsk, pi_task),
536
537 TP_FIELDS(
538 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
539 ctf_integer(pid_t, tid, tsk->pid)
540 ctf_integer(int, oldprio, tsk->prio - MAX_RT_PRIO)
541 ctf_integer(int, newprio, pi_task ? pi_task->prio - MAX_RT_PRIO : tsk->prio - MAX_RT_PRIO)
542 )
543)
544#else
545/*
546 * Tracepoint for showing priority inheritance modifying a tasks
547 * priority.
548 */
549LTTNG_TRACEPOINT_EVENT(sched_pi_setprio,
550
551 TP_PROTO(struct task_struct *tsk, int newprio),
552
553 TP_ARGS(tsk, newprio),
554
555 TP_FIELDS(
556 ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
557 ctf_integer(pid_t, tid, tsk->pid)
558 ctf_integer(int, oldprio, tsk->prio - MAX_RT_PRIO)
559 ctf_integer(int, newprio, newprio - MAX_RT_PRIO)
560 )
561)
562#endif
563
564#endif /* LTTNG_TRACE_SCHED_H */
565
566/* This part must be outside protection */
567#include <lttng/define_trace.h>
This page took 0.024694 seconds and 4 git commands to generate.