Fix: update sched prev_state instrumentation for upstream kernel
[lttng-modules.git] / instrumentation / events / lttng-module / sched.h
index ac61bcef3d4a79335a53166574d9b39a99c79813..5b4313a98983bc71cbc649c3e84812aca9921acd 100644 (file)
+/* SPDX-License-Identifier: GPL-2.0 */
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM sched
 
 #if !defined(LTTNG_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
 #define LTTNG_TRACE_SCHED_H
 
-#include "../../../probes/lttng-tracepoint-event.h"
+#include <probes/lttng-tracepoint-event.h>
 #include <linux/sched.h>
+#include <linux/pid_namespace.h>
 #include <linux/binfmts.h>
 #include <linux/version.h>
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0))
 #include <linux/sched/rt.h>
 #endif
 
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0))
+#define lttng_proc_inum ns.inum
+#else
+#define lttng_proc_inum proc_inum
+#endif
+
+#define LTTNG_MAX_PID_NS_LEVEL 32
+
 #ifndef _TRACE_SCHED_DEF_
 #define _TRACE_SCHED_DEF_
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0))
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0))
+
+static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
+{
+        unsigned int state;
+
+#ifdef CONFIG_SCHED_DEBUG
+        BUG_ON(p != current);
+#endif /* CONFIG_SCHED_DEBUG */
+
+        /*
+         * Preemption ignores task state, therefore preempted tasks are always
+         * RUNNING (we will not have dequeued if state != RUNNING).
+         */
+        if (preempt)
+                return TASK_REPORT_MAX;
+
+        /*
+         * task_state_index() uses fls() and returns a value from 0-8 range.
+         * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
+         * it for left shift operation to get the correct task->state
+         * mapping.
+         */
+        state = task_state_index(p);
+
+        return state ? (1 << (state - 1)) : state;
+}
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(4,14,0))
+
+static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
+{
+        unsigned int state;
+
+#ifdef CONFIG_SCHED_DEBUG
+        BUG_ON(p != current);
+#endif /* CONFIG_SCHED_DEBUG */
+
+        /*
+         * Preemption ignores task state, therefore preempted tasks are always
+         * RUNNING (we will not have dequeued if state != RUNNING).
+         */
+        if (preempt)
+                return TASK_REPORT_MAX;
+
+        /*
+         * __get_task_state() uses fls() and returns a value from 0-8 range.
+         * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
+         * it for left shift operation to get the correct task->state
+         * mapping.
+         */
+        state = __get_task_state(p);
+
+        return state ? (1 << (state - 1)) : state;
+}
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0))
+
+static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
+{
+#ifdef CONFIG_SCHED_DEBUG
+       BUG_ON(p != current);
+#endif /* CONFIG_SCHED_DEBUG */
+       /*
+        * Preemption ignores task state, therefore preempted tasks are always RUNNING
+        * (we will not have dequeued if state != RUNNING).
+        */
+       return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state;
+}
+
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0))
 
 static inline long __trace_sched_switch_state(struct task_struct *p)
 {
@@ -69,7 +149,7 @@ static inline long __trace_sched_switch_state(struct task_struct *p)
        return state;
 }
 
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
+#else
 
 static inline long __trace_sched_switch_state(struct task_struct *p)
 {
@@ -122,82 +202,117 @@ LTTNG_TRACEPOINT_EVENT(sched_kthread_stop_ret,
 /*
  * Tracepoint for waking up a task:
  */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0) || \
+       LTTNG_RT_KERNEL_RANGE(4,1,10,11, 4,2,0,0) || \
+       LTTNG_RT_KERNEL_RANGE(3,18,27,26, 3,19,0,0) || \
+       LTTNG_RT_KERNEL_RANGE(3,14,61,63, 3,15,0,0) || \
+       LTTNG_RT_KERNEL_RANGE(3,12,54,73, 3,13,0,0) || \
+       LTTNG_RT_KERNEL_RANGE(3,10,97,106, 3,11,0,0) || \
+       LTTNG_RT_KERNEL_RANGE(3,4,110,139, 3,5,0,0) || \
+       LTTNG_RT_KERNEL_RANGE(3,2,77,111, 3,3,0,0))
+LTTNG_TRACEPOINT_EVENT_CLASS(sched_wakeup_template,
+
+       TP_PROTO(struct task_struct *p),
+
+       TP_ARGS(p),
+
+       TP_FIELDS(
+               ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
+               ctf_integer(pid_t, tid, p->pid)
+               ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
+               ctf_integer(int, target_cpu, task_cpu(p))
+       )
+)
+#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)) */
 LTTNG_TRACEPOINT_EVENT_CLASS(sched_wakeup_template,
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
        TP_PROTO(struct task_struct *p, int success),
 
        TP_ARGS(p, success),
-#else
-       TP_PROTO(struct rq *rq, struct task_struct *p, int success),
-
-       TP_ARGS(rq, p, success),
-#endif
 
        TP_FIELDS(
                ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
                ctf_integer(pid_t, tid, p->pid)
-               ctf_integer(int, prio, p->prio)
+               ctf_integer(int, prio, p->prio - MAX_RT_PRIO)
                ctf_integer(int, success, success)
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32))
                ctf_integer(int, target_cpu, task_cpu(p))
-#endif
        )
 )
+#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)) */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0) || \
+       LTTNG_RT_KERNEL_RANGE(4,1,10,11, 4,2,0,0) || \
+       LTTNG_RT_KERNEL_RANGE(3,18,27,26, 3,19,0,0) || \
+       LTTNG_RT_KERNEL_RANGE(3,14,61,63, 3,15,0,0) || \
+       LTTNG_RT_KERNEL_RANGE(3,12,54,73, 3,13,0,0) || \
+       LTTNG_RT_KERNEL_RANGE(3,10,97,106, 3,11,0,0) || \
+       LTTNG_RT_KERNEL_RANGE(3,4,110,139, 3,5,0,0) || \
+       LTTNG_RT_KERNEL_RANGE(3,2,77,111, 3,3,0,0))
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
+/*
+ * Tracepoint called when waking a task; this tracepoint is guaranteed to be
+ * called from the waking context.
+ */
+LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_waking,
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p))
 
+/*
+ * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
+ * It it not always called from the waking context.
+ */
 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup,
-            TP_PROTO(struct task_struct *p, int success),
-            TP_ARGS(p, success))
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p))
 
 /*
  * Tracepoint for waking up a new task:
  */
 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new,
-            TP_PROTO(struct task_struct *p, int success),
-            TP_ARGS(p, success))
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p))
 
-#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */
+#else
 
 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup,
-            TP_PROTO(struct rq *rq, struct task_struct *p, int success),
-            TP_ARGS(rq, p, success))
+            TP_PROTO(struct task_struct *p, int success),
+            TP_ARGS(p, success))
 
 /*
  * Tracepoint for waking up a new task:
  */
 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new,
-            TP_PROTO(struct rq *rq, struct task_struct *p, int success),
-            TP_ARGS(rq, p, success))
+            TP_PROTO(struct task_struct *p, int success),
+            TP_ARGS(p, success))
 
-#endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */
+#endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)) */
 
 /*
  * Tracepoint for task switches, performed by the scheduler:
  */
 LTTNG_TRACEPOINT_EVENT(sched_switch,
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
-       TP_PROTO(struct task_struct *prev,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0))
+       TP_PROTO(bool preempt,
+                struct task_struct *prev,
                 struct task_struct *next),
 
-       TP_ARGS(prev, next),
-#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */
-       TP_PROTO(struct rq *rq, struct task_struct *prev,
+       TP_ARGS(preempt, prev, next),
+#else
+       TP_PROTO(struct task_struct *prev,
                 struct task_struct *next),
 
-       TP_ARGS(rq, prev, next),
-#endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */
+       TP_ARGS(prev, next),
+#endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0)) */
 
        TP_FIELDS(
                ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN)
                ctf_integer(pid_t, prev_tid, prev->pid)
                ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO)
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
-               ctf_integer(long, prev_state, __trace_sched_switch_state(prev))
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,4,0))
+               ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev))
 #else
-               ctf_integer(long, prev_state, prev->state)
+               ctf_integer(long, prev_state, __trace_sched_switch_state(prev))
 #endif
                ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN)
                ctf_integer(pid_t, next_tid, next->pid)
@@ -242,7 +357,7 @@ LTTNG_TRACEPOINT_EVENT_CLASS(sched_process_template,
 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_process_free,
             TP_PROTO(struct task_struct *p),
             TP_ARGS(p))
-            
+
 
 /*
  * Tracepoint for a task exiting:
@@ -254,15 +369,9 @@ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_process_exit,
 /*
  * Tracepoint for waiting on task to unschedule:
  */
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35))
 LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_wait_task,
        TP_PROTO(struct task_struct *p),
        TP_ARGS(p))
-#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */
-LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_process_template, sched_wait_task,
-       TP_PROTO(struct rq *rq, struct task_struct *p),
-       TP_ARGS(rq, p))
-#endif /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)) */
 
 /*
  * Tracepoint for a waiting task:
@@ -288,39 +397,75 @@ LTTNG_TRACEPOINT_EVENT(sched_process_wait,
  * == child_pid, while creation of a thread yields to child_tid !=
  * child_pid.
  */
-LTTNG_TRACEPOINT_EVENT(sched_process_fork,
+LTTNG_TRACEPOINT_EVENT_CODE(sched_process_fork,
 
        TP_PROTO(struct task_struct *parent, struct task_struct *child),
 
        TP_ARGS(parent, child),
 
+       TP_locvar(
+               pid_t vtids[LTTNG_MAX_PID_NS_LEVEL];
+               unsigned int ns_level;
+       ),
+
+       TP_code_pre(
+               if (child) {
+                       struct pid *child_pid;
+                       unsigned int i;
+
+                       child_pid = task_pid(child);
+                       tp_locvar->ns_level =
+                               min_t(unsigned int, child_pid->level + 1,
+                                       LTTNG_MAX_PID_NS_LEVEL);
+                       for (i = 0; i < tp_locvar->ns_level; i++)
+                               tp_locvar->vtids[i] = child_pid->numbers[i].nr;
+               }
+       ),
+
        TP_FIELDS(
                ctf_array_text(char, parent_comm, parent->comm, TASK_COMM_LEN)
                ctf_integer(pid_t, parent_tid, parent->pid)
                ctf_integer(pid_t, parent_pid, parent->tgid)
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0))
+               ctf_integer(unsigned int, parent_ns_inum,
+                       ({
+                               unsigned int parent_ns_inum = 0;
+
+                               if (parent) {
+                                       struct pid_namespace *pid_ns;
+
+                                       pid_ns = task_active_pid_ns(parent);
+                                       if (pid_ns)
+                                               parent_ns_inum =
+                                                       pid_ns->lttng_proc_inum;
+                               }
+                               parent_ns_inum;
+                       }))
+#endif
                ctf_array_text(char, child_comm, child->comm, TASK_COMM_LEN)
                ctf_integer(pid_t, child_tid, child->pid)
+               ctf_sequence(pid_t, vtids, tp_locvar->vtids, u8, tp_locvar->ns_level)
                ctf_integer(pid_t, child_pid, child->tgid)
-       )
-)
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33))
-/*
- * Tracepoint for sending a signal:
- */
-LTTNG_TRACEPOINT_EVENT(sched_signal_send,
-
-       TP_PROTO(int sig, struct task_struct *p),
-
-       TP_ARGS(sig, p),
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0))
+               ctf_integer(unsigned int, child_ns_inum,
+                       ({
+                               unsigned int child_ns_inum = 0;
+
+                               if (child) {
+                                       struct pid_namespace *pid_ns;
+
+                                       pid_ns = task_active_pid_ns(child);
+                                       if (pid_ns)
+                                               child_ns_inum =
+                                                       pid_ns->lttng_proc_inum;
+                               }
+                               child_ns_inum;
+                       }))
+#endif
+       ),
 
-       TP_FIELDS(
-               ctf_integer(int, sig, sig)
-               ctf_array_text(char, comm, p->comm, TASK_COMM_LEN)
-               ctf_integer(pid_t, tid, p->pid)
-       )
+       TP_code_post()
 )
-#endif
 
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,4,0))
 /*
@@ -341,7 +486,6 @@ LTTNG_TRACEPOINT_EVENT(sched_process_exec,
 )
 #endif
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32))
 /*
  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
@@ -410,9 +554,28 @@ LTTNG_TRACEPOINT_EVENT(sched_stat_runtime,
                ctf_integer(u64, vruntime, vruntime)
        )
 )
-#endif
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37))
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,12,0) || \
+       LTTNG_RT_KERNEL_RANGE(4,9,27,18, 4,10,0,0) || \
+       LTTNG_RT_KERNEL_RANGE(4,11,5,1, 4,12,0,0))
+/*
+ * Tracepoint for showing priority inheritance modifying a tasks
+ * priority.
+ */
+LTTNG_TRACEPOINT_EVENT(sched_pi_setprio,
+
+       TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),
+
+       TP_ARGS(tsk, pi_task),
+
+       TP_FIELDS(
+               ctf_array_text(char, comm, tsk->comm, TASK_COMM_LEN)
+               ctf_integer(pid_t, tid, tsk->pid)
+               ctf_integer(int, oldprio, tsk->prio - MAX_RT_PRIO)
+               ctf_integer(int, newprio, pi_task ? pi_task->prio - MAX_RT_PRIO : tsk->prio - MAX_RT_PRIO)
+       )
+)
+#else
 /*
  * Tracepoint for showing priority inheritance modifying a tasks
  * priority.
@@ -435,4 +598,4 @@ LTTNG_TRACEPOINT_EVENT(sched_pi_setprio,
 #endif /* LTTNG_TRACE_SCHED_H */
 
 /* This part must be outside protection */
-#include "../../../probes/define_trace.h"
+#include <probes/define_trace.h>
This page took 0.028039 seconds and 4 git commands to generate.