Commit | Line | Data |
---|---|---|
b7cdc182 | 1 | /* SPDX-License-Identifier: (GPL-2.0-only or LGPL-2.1-only) |
9f36eaed | 2 | * |
886d51a3 MD |
3 | * lttng-statedump.c |
4 | * | |
c337ddc2 MD |
5 | * Linux Trace Toolkit Next Generation Kernel State Dump |
6 | * | |
7 | * Copyright 2005 Jean-Hugues Deschenes <jean-hugues.deschenes@polymtl.ca> | |
8 | * Copyright 2006-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | |
9 | * | |
10 | * Changes: | |
11 | * Eric Clement: Add listing of network IP interface | |
12 | * 2006, 2007 Mathieu Desnoyers Fix kernel threads | |
13 | * Various updates | |
c337ddc2 MD |
14 | */ |
15 | ||
16 | #include <linux/init.h> | |
17 | #include <linux/module.h> | |
18 | #include <linux/netlink.h> | |
19 | #include <linux/inet.h> | |
20 | #include <linux/ip.h> | |
21 | #include <linux/kthread.h> | |
22 | #include <linux/proc_fs.h> | |
23 | #include <linux/file.h> | |
24 | #include <linux/interrupt.h> | |
25 | #include <linux/irqnr.h> | |
c337ddc2 MD |
26 | #include <linux/netdevice.h> |
27 | #include <linux/inetdevice.h> | |
c337ddc2 | 28 | #include <linux/mm.h> |
c337ddc2 MD |
29 | #include <linux/swap.h> |
30 | #include <linux/wait.h> | |
31 | #include <linux/mutex.h> | |
f0dbdefb | 32 | #include <linux/device.h> |
c337ddc2 | 33 | |
d82de6f3 MJ |
34 | #include <linux/blkdev.h> |
35 | ||
2df37e95 MD |
36 | #include <lttng/events.h> |
37 | #include <lttng/tracer.h> | |
05d69af4 | 38 | #include <lttng/events-internal.h> |
8be4c8a3 | 39 | #include <wrapper/cpu.h> |
241ae9a8 | 40 | #include <wrapper/irqdesc.h> |
241ae9a8 | 41 | #include <wrapper/fdtable.h> |
1965e6b4 | 42 | #include <wrapper/namespace.h> |
241ae9a8 MD |
43 | #include <wrapper/irq.h> |
44 | #include <wrapper/tracepoint.h> | |
45 | #include <wrapper/genhd.h> | |
46 | #include <wrapper/file.h> | |
c07dca48 | 47 | #include <wrapper/fdtable.h> |
a9f6bb33 | 48 | #include <wrapper/sched.h> |
c337ddc2 | 49 | |
29784493 | 50 | #ifdef CONFIG_LTTNG_HAS_LIST_IRQ |
c337ddc2 MD |
51 | #include <linux/irq.h> |
52 | #endif | |
53 | ||
54 | /* Define the tracepoints, but do not build the probes */ | |
55 | #define CREATE_TRACE_POINTS | |
4f47ccf0 | 56 | #define TRACE_INCLUDE_PATH instrumentation/events |
c337ddc2 | 57 | #define TRACE_INCLUDE_FILE lttng-statedump |
3bc29f0a | 58 | #define LTTNG_INSTRUMENTATION |
4f47ccf0 | 59 | #include <instrumentation/events/lttng-statedump.h> |
c337ddc2 | 60 | |
0116245f | 61 | LTTNG_DEFINE_TRACE(lttng_statedump_block_device, |
a40e3229 | 62 | TP_PROTO(struct lttng_kernel_session *session, |
0116245f MJ |
63 | dev_t dev, const char *diskname), |
64 | TP_ARGS(session, dev, diskname)); | |
65 | ||
66 | LTTNG_DEFINE_TRACE(lttng_statedump_end, | |
a40e3229 | 67 | TP_PROTO(struct lttng_kernel_session *session), |
0116245f MJ |
68 | TP_ARGS(session)); |
69 | ||
70 | LTTNG_DEFINE_TRACE(lttng_statedump_interrupt, | |
a40e3229 | 71 | TP_PROTO(struct lttng_kernel_session *session, |
0116245f MJ |
72 | unsigned int irq, const char *chip_name, |
73 | struct irqaction *action), | |
74 | TP_ARGS(session, irq, chip_name, action)); | |
75 | ||
76 | LTTNG_DEFINE_TRACE(lttng_statedump_file_descriptor, | |
a40e3229 | 77 | TP_PROTO(struct lttng_kernel_session *session, |
0116245f MJ |
78 | struct files_struct *files, |
79 | int fd, const char *filename, | |
80 | unsigned int flags, fmode_t fmode), | |
81 | TP_ARGS(session, files, fd, filename, flags, fmode)); | |
82 | ||
83 | LTTNG_DEFINE_TRACE(lttng_statedump_start, | |
a40e3229 | 84 | TP_PROTO(struct lttng_kernel_session *session), |
0116245f MJ |
85 | TP_ARGS(session)); |
86 | ||
87 | LTTNG_DEFINE_TRACE(lttng_statedump_process_state, | |
a40e3229 | 88 | TP_PROTO(struct lttng_kernel_session *session, |
0116245f MJ |
89 | struct task_struct *p, |
90 | int type, int mode, int submode, int status, | |
91 | struct files_struct *files), | |
92 | TP_ARGS(session, p, type, mode, submode, status, files)); | |
93 | ||
94 | LTTNG_DEFINE_TRACE(lttng_statedump_process_pid_ns, | |
a40e3229 | 95 | TP_PROTO(struct lttng_kernel_session *session, |
0116245f MJ |
96 | struct task_struct *p, |
97 | struct pid_namespace *pid_ns), | |
98 | TP_ARGS(session, p, pid_ns)); | |
99 | ||
5f4c791e | 100 | #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,6,0)) |
0116245f | 101 | LTTNG_DEFINE_TRACE(lttng_statedump_process_cgroup_ns, |
a40e3229 | 102 | TP_PROTO(struct lttng_kernel_session *session, |
0116245f MJ |
103 | struct task_struct *p, |
104 | struct cgroup_namespace *cgroup_ns), | |
105 | TP_ARGS(session, p, cgroup_ns)); | |
1965e6b4 | 106 | #endif |
0116245f MJ |
107 | |
108 | LTTNG_DEFINE_TRACE(lttng_statedump_process_ipc_ns, | |
a40e3229 | 109 | TP_PROTO(struct lttng_kernel_session *session, |
0116245f MJ |
110 | struct task_struct *p, |
111 | struct ipc_namespace *ipc_ns), | |
112 | TP_ARGS(session, p, ipc_ns)); | |
113 | ||
1965e6b4 | 114 | #ifndef LTTNG_MNT_NS_MISSING_HEADER |
0116245f | 115 | LTTNG_DEFINE_TRACE(lttng_statedump_process_mnt_ns, |
a40e3229 | 116 | TP_PROTO(struct lttng_kernel_session *session, |
0116245f MJ |
117 | struct task_struct *p, |
118 | struct mnt_namespace *mnt_ns), | |
119 | TP_ARGS(session, p, mnt_ns)); | |
1965e6b4 | 120 | #endif |
0116245f MJ |
121 | |
122 | LTTNG_DEFINE_TRACE(lttng_statedump_process_net_ns, | |
a40e3229 | 123 | TP_PROTO(struct lttng_kernel_session *session, |
0116245f MJ |
124 | struct task_struct *p, |
125 | struct net *net_ns), | |
126 | TP_ARGS(session, p, net_ns)); | |
127 | ||
128 | LTTNG_DEFINE_TRACE(lttng_statedump_process_user_ns, | |
a40e3229 | 129 | TP_PROTO(struct lttng_kernel_session *session, |
0116245f MJ |
130 | struct task_struct *p, |
131 | struct user_namespace *user_ns), | |
132 | TP_ARGS(session, p, user_ns)); | |
133 | ||
134 | LTTNG_DEFINE_TRACE(lttng_statedump_process_uts_ns, | |
a40e3229 | 135 | TP_PROTO(struct lttng_kernel_session *session, |
0116245f MJ |
136 | struct task_struct *p, |
137 | struct uts_namespace *uts_ns), | |
138 | TP_ARGS(session, p, uts_ns)); | |
139 | ||
140 | LTTNG_DEFINE_TRACE(lttng_statedump_process_time_ns, | |
a40e3229 | 141 | TP_PROTO(struct lttng_kernel_session *session, |
0116245f MJ |
142 | struct task_struct *p, |
143 | struct time_namespace *time_ns), | |
144 | TP_ARGS(session, p, time_ns)); | |
145 | ||
146 | LTTNG_DEFINE_TRACE(lttng_statedump_network_interface, | |
a40e3229 | 147 | TP_PROTO(struct lttng_kernel_session *session, |
0116245f MJ |
148 | struct net_device *dev, struct in_ifaddr *ifa), |
149 | TP_ARGS(session, dev, ifa)); | |
150 | ||
d0b55e4c | 151 | #ifdef LTTNG_HAVE_STATEDUMP_CPU_TOPOLOGY |
0116245f | 152 | LTTNG_DEFINE_TRACE(lttng_statedump_cpu_topology, |
a40e3229 | 153 | TP_PROTO(struct lttng_kernel_session *session, struct cpuinfo_x86 *c), |
0116245f | 154 | TP_ARGS(session, c)); |
502e4132 | 155 | #endif |
20591cf7 | 156 | |
361c023a MD |
157 | struct lttng_fd_ctx { |
158 | char *page; | |
a40e3229 | 159 | struct lttng_kernel_session *session; |
d561ecfb | 160 | struct files_struct *files; |
361c023a MD |
161 | }; |
162 | ||
c337ddc2 MD |
163 | /* |
164 | * Protected by the trace lock. | |
165 | */ | |
166 | static struct delayed_work cpu_work[NR_CPUS]; | |
167 | static DECLARE_WAIT_QUEUE_HEAD(statedump_wq); | |
168 | static atomic_t kernel_threads_to_run; | |
169 | ||
170 | enum lttng_thread_type { | |
171 | LTTNG_USER_THREAD = 0, | |
172 | LTTNG_KERNEL_THREAD = 1, | |
173 | }; | |
174 | ||
175 | enum lttng_execution_mode { | |
176 | LTTNG_USER_MODE = 0, | |
177 | LTTNG_SYSCALL = 1, | |
178 | LTTNG_TRAP = 2, | |
179 | LTTNG_IRQ = 3, | |
180 | LTTNG_SOFTIRQ = 4, | |
181 | LTTNG_MODE_UNKNOWN = 5, | |
182 | }; | |
183 | ||
184 | enum lttng_execution_submode { | |
185 | LTTNG_NONE = 0, | |
186 | LTTNG_UNKNOWN = 1, | |
187 | }; | |
188 | ||
189 | enum lttng_process_status { | |
190 | LTTNG_UNNAMED = 0, | |
191 | LTTNG_WAIT_FORK = 1, | |
192 | LTTNG_WAIT_CPU = 2, | |
193 | LTTNG_EXIT = 3, | |
194 | LTTNG_ZOMBIE = 4, | |
195 | LTTNG_WAIT = 5, | |
196 | LTTNG_RUN = 6, | |
197 | LTTNG_DEAD = 7, | |
198 | }; | |
199 | ||
fef7fe6a | 200 | |
f6800492 MH |
201 | #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(6,0,0) || \ |
202 | LTTNG_RHEL_KERNEL_RANGE(5,14,0,163,0,0, 5,15,0,0,0,0)) | |
8c2e17ee MJ |
203 | |
204 | #define LTTNG_PART_STRUCT_TYPE struct block_device | |
205 | ||
206 | static | |
207 | int lttng_get_part_name(struct gendisk *disk, struct block_device *part, char *name_buf) | |
208 | { | |
209 | int ret; | |
210 | ||
211 | ret = snprintf(name_buf, BDEVNAME_SIZE, "%pg", part); | |
212 | if (ret < 0 || ret >= BDEVNAME_SIZE) | |
213 | return -ENOSYS; | |
214 | ||
215 | return 0; | |
216 | } | |
217 | ||
218 | static | |
219 | dev_t lttng_get_part_devt(struct block_device *part) | |
220 | { | |
221 | return part->bd_dev; | |
222 | } | |
223 | ||
224 | #elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,11,0)) | |
fef7fe6a MJ |
225 | |
226 | #define LTTNG_PART_STRUCT_TYPE struct block_device | |
227 | ||
228 | static | |
229 | int lttng_get_part_name(struct gendisk *disk, struct block_device *part, char *name_buf) | |
230 | { | |
231 | const char *p; | |
232 | ||
233 | p = bdevname(part, name_buf); | |
234 | if (!p) | |
235 | return -ENOSYS; | |
236 | ||
237 | return 0; | |
238 | } | |
239 | ||
240 | static | |
241 | dev_t lttng_get_part_devt(struct block_device *part) | |
242 | { | |
243 | return part->bd_dev; | |
244 | } | |
245 | ||
246 | #else | |
247 | ||
248 | #define LTTNG_PART_STRUCT_TYPE struct hd_struct | |
249 | ||
250 | static | |
251 | int lttng_get_part_name(struct gendisk *disk, struct hd_struct *part, char *name_buf) | |
252 | { | |
253 | const char *p; | |
254 | struct block_device bdev; | |
255 | ||
256 | /* | |
257 | * Create a partial 'struct blockdevice' to use | |
258 | * 'bdevname()' which is a simple wrapper over | |
259 | * 'disk_name()' but has the honor to be EXPORT_SYMBOL. | |
260 | */ | |
261 | bdev.bd_disk = disk; | |
262 | bdev.bd_part = part; | |
263 | ||
264 | p = bdevname(&bdev, name_buf); | |
265 | if (!p) | |
266 | return -ENOSYS; | |
267 | ||
268 | return 0; | |
269 | } | |
270 | ||
271 | static | |
272 | dev_t lttng_get_part_devt(struct hd_struct *part) | |
273 | { | |
274 | return part_devt(part); | |
275 | } | |
276 | #endif | |
277 | ||
de874bde MJ |
278 | #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,12,0)) |
279 | static | |
280 | int lttng_statedump_each_block_device(struct lttng_kernel_session *session, struct gendisk *disk) | |
281 | { | |
282 | struct block_device *part; | |
283 | unsigned long idx; | |
284 | int ret = 0; | |
285 | ||
286 | /* Include partition 0 */ | |
287 | idx = 0; | |
288 | ||
289 | rcu_read_lock(); | |
290 | xa_for_each(&disk->part_tbl, idx, part) { | |
291 | char name_buf[BDEVNAME_SIZE]; | |
292 | ||
293 | /* Exclude non-partitions bdev and empty partitions. */ | |
294 | if (bdev_is_partition(part) && !bdev_nr_sectors(part)) | |
295 | continue; | |
296 | ||
297 | if (lttng_get_part_name(disk, part, name_buf) == -ENOSYS) { | |
298 | ret = -ENOSYS; | |
299 | goto end; | |
300 | } | |
301 | trace_lttng_statedump_block_device(session, lttng_get_part_devt(part), | |
302 | name_buf); | |
303 | } | |
304 | end: | |
305 | rcu_read_unlock(); | |
306 | return ret; | |
307 | } | |
308 | #else | |
309 | static | |
310 | int lttng_statedump_each_block_device(struct lttng_kernel_session *session, struct gendisk *disk) | |
311 | { | |
312 | struct disk_part_iter piter; | |
313 | LTTNG_PART_STRUCT_TYPE *part; | |
314 | ||
315 | disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); | |
316 | ||
317 | while ((part = disk_part_iter_next(&piter))) { | |
318 | char name_buf[BDEVNAME_SIZE]; | |
319 | ||
320 | if (lttng_get_part_name(disk, part, name_buf) == -ENOSYS) { | |
321 | disk_part_iter_exit(&piter); | |
322 | return -ENOSYS; | |
323 | } | |
324 | trace_lttng_statedump_block_device(session, lttng_get_part_devt(part), | |
325 | name_buf); | |
326 | } | |
327 | disk_part_iter_exit(&piter); | |
328 | ||
329 | return 0; | |
330 | } | |
331 | #endif | |
332 | ||
f0dbdefb | 333 | static |
a40e3229 | 334 | int lttng_enumerate_block_devices(struct lttng_kernel_session *session) |
f0dbdefb HD |
335 | { |
336 | struct class *ptr_block_class; | |
337 | struct device_type *ptr_disk_type; | |
338 | struct class_dev_iter iter; | |
339 | struct device *dev; | |
7633c773 | 340 | int ret = 0; |
f0dbdefb HD |
341 | |
342 | ptr_block_class = wrapper_get_block_class(); | |
7633c773 MJ |
343 | if (!ptr_block_class) { |
344 | ret = -ENOSYS; | |
345 | goto end; | |
346 | } | |
f0dbdefb HD |
347 | ptr_disk_type = wrapper_get_disk_type(); |
348 | if (!ptr_disk_type) { | |
7633c773 MJ |
349 | ret = -ENOSYS; |
350 | goto end; | |
f0dbdefb HD |
351 | } |
352 | class_dev_iter_init(&iter, ptr_block_class, NULL, ptr_disk_type); | |
353 | while ((dev = class_dev_iter_next(&iter))) { | |
f0dbdefb | 354 | struct gendisk *disk = dev_to_disk(dev); |
f0dbdefb | 355 | |
5a91f3df MD |
356 | /* |
357 | * Don't show empty devices or things that have been | |
358 | * suppressed | |
359 | */ | |
360 | if (get_capacity(disk) == 0 || | |
a88ee460 | 361 | (disk->flags & LTTNG_GENHD_FL_HIDDEN)) |
5a91f3df MD |
362 | continue; |
363 | ||
de874bde | 364 | ret = lttng_statedump_each_block_device(session, disk); |
f0dbdefb HD |
365 | } |
366 | class_dev_iter_exit(&iter); | |
7633c773 MJ |
367 | end: |
368 | return ret; | |
f0dbdefb HD |
369 | } |
370 | ||
c337ddc2 | 371 | #ifdef CONFIG_INET |
f0dbdefb | 372 | |
c337ddc2 | 373 | static |
a40e3229 | 374 | void lttng_enumerate_device(struct lttng_kernel_session *session, |
c337ddc2 MD |
375 | struct net_device *dev) |
376 | { | |
377 | struct in_device *in_dev; | |
378 | struct in_ifaddr *ifa; | |
379 | ||
380 | if (dev->flags & IFF_UP) { | |
381 | in_dev = in_dev_get(dev); | |
382 | if (in_dev) { | |
383 | for (ifa = in_dev->ifa_list; ifa != NULL; | |
384 | ifa = ifa->ifa_next) { | |
385 | trace_lttng_statedump_network_interface( | |
386 | session, dev, ifa); | |
387 | } | |
388 | in_dev_put(in_dev); | |
389 | } | |
390 | } else { | |
391 | trace_lttng_statedump_network_interface( | |
392 | session, dev, NULL); | |
393 | } | |
394 | } | |
395 | ||
52eb2ee9 KS |
396 | #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(6,9,0)) |
397 | static | |
398 | int lttng_enumerate_network_ip_interface(struct lttng_kernel_session *session) | |
399 | { | |
400 | struct net_device *dev; | |
401 | ||
402 | rtnl_lock(); | |
403 | for_each_netdev(&init_net, dev) | |
404 | lttng_enumerate_device(session, dev); | |
405 | rtnl_unlock(); | |
406 | ||
407 | return 0; | |
408 | } | |
409 | #else | |
c337ddc2 | 410 | static |
a40e3229 | 411 | int lttng_enumerate_network_ip_interface(struct lttng_kernel_session *session) |
c337ddc2 MD |
412 | { |
413 | struct net_device *dev; | |
414 | ||
415 | read_lock(&dev_base_lock); | |
416 | for_each_netdev(&init_net, dev) | |
417 | lttng_enumerate_device(session, dev); | |
418 | read_unlock(&dev_base_lock); | |
419 | ||
420 | return 0; | |
421 | } | |
52eb2ee9 | 422 | #endif /* (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(6,9,0)) */ |
c337ddc2 MD |
423 | #else /* CONFIG_INET */ |
424 | static inline | |
a40e3229 | 425 | int lttng_enumerate_network_ip_interface(struct lttng_kernel_session *session) |
c337ddc2 MD |
426 | { |
427 | return 0; | |
428 | } | |
429 | #endif /* CONFIG_INET */ | |
430 | ||
361c023a MD |
431 | static |
432 | int lttng_dump_one_fd(const void *p, struct file *file, unsigned int fd) | |
433 | { | |
434 | const struct lttng_fd_ctx *ctx = p; | |
435 | const char *s = d_path(&file->f_path, ctx->page, PAGE_SIZE); | |
29021503 | 436 | unsigned int flags = file->f_flags; |
d561ecfb | 437 | struct fdtable *fdt; |
361c023a | 438 | |
29021503 MD |
439 | /* |
440 | * We don't expose kernel internal flags, only userspace-visible | |
441 | * flags. | |
442 | */ | |
443 | flags &= ~FMODE_NONOTIFY; | |
d561ecfb | 444 | fdt = files_fdtable(ctx->files); |
3801d5d2 | 445 | |
d561ecfb | 446 | /* |
3801d5d2 MJ |
447 | * The fdt should only grow and iterate_fd() holds file_lock, which |
448 | * should ensure the fdt does not change while the lock is taken but be | |
449 | * cautious and check anyway. | |
d561ecfb | 450 | */ |
3801d5d2 MJ |
451 | if (WARN_ON_ONCE(fd >= fdt->max_fds)) |
452 | return 0; | |
453 | ||
454 | if (lttng_close_on_exec(fd, ctx->files)) | |
29021503 | 455 | flags |= O_CLOEXEC; |
3801d5d2 MJ |
456 | |
457 | /* | |
458 | * If d_path() failed to get a full path for the file, use the dentry | |
459 | * name instead to at least get a filename. | |
460 | */ | |
361c023a MD |
461 | if (IS_ERR(s)) { |
462 | struct dentry *dentry = file->f_path.dentry; | |
463 | ||
464 | /* Make sure we give at least some info */ | |
465 | spin_lock(&dentry->d_lock); | |
e7a0ca72 MD |
466 | trace_lttng_statedump_file_descriptor(ctx->session, |
467 | ctx->files, fd, dentry->d_name.name, flags, | |
468 | file->f_mode); | |
361c023a MD |
469 | spin_unlock(&dentry->d_lock); |
470 | goto end; | |
471 | } | |
e7a0ca72 MD |
472 | trace_lttng_statedump_file_descriptor(ctx->session, |
473 | ctx->files, fd, s, flags, file->f_mode); | |
361c023a MD |
474 | end: |
475 | return 0; | |
476 | } | |
c337ddc2 | 477 | |
e7a0ca72 | 478 | /* Called with task lock held. */ |
c337ddc2 | 479 | static |
a40e3229 | 480 | void lttng_enumerate_files(struct lttng_kernel_session *session, |
e7a0ca72 MD |
481 | struct files_struct *files, |
482 | char *tmp) | |
c337ddc2 | 483 | { |
e7a0ca72 | 484 | struct lttng_fd_ctx ctx = { .page = tmp, .session = session, .files = files, }; |
c337ddc2 | 485 | |
d561ecfb | 486 | lttng_iterate_fd(files, 0, lttng_dump_one_fd, &ctx); |
c337ddc2 MD |
487 | } |
488 | ||
d0b55e4c | 489 | #ifdef LTTNG_HAVE_STATEDUMP_CPU_TOPOLOGY |
502e4132 | 490 | static |
a40e3229 | 491 | int lttng_enumerate_cpu_topology(struct lttng_kernel_session *session) |
502e4132 JD |
492 | { |
493 | int cpu; | |
494 | const cpumask_t *cpumask = cpu_possible_mask; | |
495 | ||
496 | for (cpu = cpumask_first(cpumask); cpu < nr_cpu_ids; | |
497 | cpu = cpumask_next(cpu, cpumask)) { | |
498 | trace_lttng_statedump_cpu_topology(session, &cpu_data(cpu)); | |
499 | } | |
500 | ||
501 | return 0; | |
502 | } | |
503 | #else | |
504 | static | |
a40e3229 | 505 | int lttng_enumerate_cpu_topology(struct lttng_kernel_session *session) |
502e4132 JD |
506 | { |
507 | return 0; | |
508 | } | |
509 | #endif | |
510 | ||
0658bdda MD |
511 | #if 0 |
512 | /* | |
513 | * FIXME: we cannot take a mmap_sem while in a RCU read-side critical section | |
514 | * (scheduling in atomic). Normally, the tasklist lock protects this kind of | |
515 | * iteration, but it is not exported to modules. | |
516 | */ | |
c337ddc2 | 517 | static |
a40e3229 | 518 | void lttng_enumerate_task_vm_maps(struct lttng_kernel_session *session, |
c337ddc2 MD |
519 | struct task_struct *p) |
520 | { | |
521 | struct mm_struct *mm; | |
522 | struct vm_area_struct *map; | |
523 | unsigned long ino; | |
524 | ||
525 | /* get_task_mm does a task_lock... */ | |
526 | mm = get_task_mm(p); | |
527 | if (!mm) | |
528 | return; | |
529 | ||
530 | map = mm->mmap; | |
531 | if (map) { | |
532 | down_read(&mm->mmap_sem); | |
533 | while (map) { | |
534 | if (map->vm_file) | |
b06ed645 | 535 | ino = map->vm_file->lttng_f_dentry->d_inode->i_ino; |
c337ddc2 MD |
536 | else |
537 | ino = 0; | |
538 | trace_lttng_statedump_vm_map(session, p, map, ino); | |
539 | map = map->vm_next; | |
540 | } | |
541 | up_read(&mm->mmap_sem); | |
542 | } | |
543 | mmput(mm); | |
544 | } | |
545 | ||
546 | static | |
a40e3229 | 547 | int lttng_enumerate_vm_maps(struct lttng_kernel_session *session) |
c337ddc2 MD |
548 | { |
549 | struct task_struct *p; | |
550 | ||
551 | rcu_read_lock(); | |
552 | for_each_process(p) | |
553 | lttng_enumerate_task_vm_maps(session, p); | |
554 | rcu_read_unlock(); | |
555 | return 0; | |
556 | } | |
0658bdda | 557 | #endif |
c337ddc2 | 558 | |
29784493 | 559 | #ifdef CONFIG_LTTNG_HAS_LIST_IRQ |
47faec4b | 560 | |
c337ddc2 | 561 | static |
a40e3229 | 562 | int lttng_list_interrupts(struct lttng_kernel_session *session) |
c337ddc2 MD |
563 | { |
564 | unsigned int irq; | |
565 | unsigned long flags = 0; | |
566 | struct irq_desc *desc; | |
567 | ||
568 | #define irq_to_desc wrapper_irq_to_desc | |
569 | /* needs irq_desc */ | |
570 | for_each_irq_desc(irq, desc) { | |
571 | struct irqaction *action; | |
572 | const char *irq_chip_name = | |
573 | irq_desc_get_chip(desc)->name ? : "unnamed_irq_chip"; | |
574 | ||
575 | local_irq_save(flags); | |
fc94c945 | 576 | raw_spin_lock(&desc->lock); |
c337ddc2 MD |
577 | for (action = desc->action; action; action = action->next) { |
578 | trace_lttng_statedump_interrupt(session, | |
579 | irq, irq_chip_name, action); | |
580 | } | |
fc94c945 | 581 | raw_spin_unlock(&desc->lock); |
c337ddc2 MD |
582 | local_irq_restore(flags); |
583 | } | |
cfcee1c7 | 584 | return 0; |
c337ddc2 MD |
585 | #undef irq_to_desc |
586 | } | |
587 | #else | |
588 | static inline | |
a40e3229 | 589 | int lttng_list_interrupts(struct lttng_kernel_session *session) |
c337ddc2 | 590 | { |
cfcee1c7 | 591 | return 0; |
c337ddc2 MD |
592 | } |
593 | #endif | |
594 | ||
4ba1f53c | 595 | /* |
1965e6b4 MJ |
596 | * Statedump the task's namespaces using the proc filesystem inode number as |
597 | * the unique identifier. The user and pid ns are nested and will be dumped | |
598 | * recursively. | |
599 | * | |
4ba1f53c MD |
600 | * Called with task lock held. |
601 | */ | |
73e8ba37 | 602 | static |
a40e3229 | 603 | void lttng_statedump_process_ns(struct lttng_kernel_session *session, |
73e8ba37 JD |
604 | struct task_struct *p, |
605 | enum lttng_thread_type type, | |
606 | enum lttng_execution_mode mode, | |
607 | enum lttng_execution_submode submode, | |
608 | enum lttng_process_status status) | |
609 | { | |
1965e6b4 | 610 | struct nsproxy *proxy; |
73e8ba37 | 611 | struct pid_namespace *pid_ns; |
1965e6b4 | 612 | struct user_namespace *user_ns; |
73e8ba37 | 613 | |
1965e6b4 MJ |
614 | /* |
615 | * The pid and user namespaces are special, they are nested and | |
616 | * accessed with specific functions instead of the nsproxy struct | |
617 | * like the other namespaces. | |
618 | */ | |
887bcdac MJ |
619 | pid_ns = task_active_pid_ns(p); |
620 | do { | |
1965e6b4 | 621 | trace_lttng_statedump_process_pid_ns(session, p, pid_ns); |
adcc8b5e | 622 | pid_ns = pid_ns ? pid_ns->parent : NULL; |
887bcdac | 623 | } while (pid_ns); |
1965e6b4 MJ |
624 | |
625 | ||
626 | user_ns = task_cred_xxx(p, user_ns); | |
627 | do { | |
628 | trace_lttng_statedump_process_user_ns(session, p, user_ns); | |
1964cccb MD |
629 | /* |
630 | * trace_lttng_statedump_process_user_ns() internally | |
631 | * checks whether user_ns is NULL. While this does not | |
632 | * appear to be a possible return value for | |
633 | * task_cred_xxx(), err on the safe side and check | |
634 | * for NULL here as well to be consistent with the | |
635 | * paranoid behavior of | |
636 | * trace_lttng_statedump_process_user_ns(). | |
637 | */ | |
638 | user_ns = user_ns ? user_ns->lttng_user_ns_parent : NULL; | |
1965e6b4 MJ |
639 | } while (user_ns); |
640 | ||
641 | /* | |
642 | * Back and forth on locking strategy within Linux upstream for nsproxy. | |
643 | * See Linux upstream commit 728dba3a39c66b3d8ac889ddbe38b5b1c264aec3 | |
644 | * "namespaces: Use task_lock and not rcu to protect nsproxy" | |
645 | * for details. | |
646 | */ | |
5f4c791e | 647 | #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(3,17,0) || \ |
1965e6b4 MJ |
648 | LTTNG_UBUNTU_KERNEL_RANGE(3,13,11,36, 3,14,0,0) || \ |
649 | LTTNG_UBUNTU_KERNEL_RANGE(3,16,1,11, 3,17,0,0) || \ | |
650 | LTTNG_RHEL_KERNEL_RANGE(3,10,0,229,13,0, 3,11,0,0,0,0)) | |
651 | proxy = p->nsproxy; | |
652 | #else | |
653 | rcu_read_lock(); | |
654 | proxy = task_nsproxy(p); | |
655 | #endif | |
656 | if (proxy) { | |
5f4c791e | 657 | #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,6,0)) |
1965e6b4 MJ |
658 | trace_lttng_statedump_process_cgroup_ns(session, p, proxy->cgroup_ns); |
659 | #endif | |
660 | trace_lttng_statedump_process_ipc_ns(session, p, proxy->ipc_ns); | |
661 | #ifndef LTTNG_MNT_NS_MISSING_HEADER | |
662 | trace_lttng_statedump_process_mnt_ns(session, p, proxy->mnt_ns); | |
663 | #endif | |
664 | trace_lttng_statedump_process_net_ns(session, p, proxy->net_ns); | |
665 | trace_lttng_statedump_process_uts_ns(session, p, proxy->uts_ns); | |
f3a2b1ec MJ |
666 | #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,6,0) || \ |
667 | LTTNG_RHEL_KERNEL_RANGE(4,18,0,305,0,0, 4,19,0,0,0,0)) | |
876e2e92 MJ |
668 | trace_lttng_statedump_process_time_ns(session, p, proxy->time_ns); |
669 | #endif | |
1965e6b4 | 670 | } |
5f4c791e | 671 | #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(3,17,0) || \ |
1965e6b4 MJ |
672 | LTTNG_UBUNTU_KERNEL_RANGE(3,13,11,36, 3,14,0,0) || \ |
673 | LTTNG_UBUNTU_KERNEL_RANGE(3,16,1,11, 3,17,0,0) || \ | |
674 | LTTNG_RHEL_KERNEL_RANGE(3,10,0,229,13,0, 3,11,0,0,0,0)) | |
675 | /* (nothing) */ | |
676 | #else | |
677 | rcu_read_unlock(); | |
678 | #endif | |
73e8ba37 JD |
679 | } |
680 | ||
c337ddc2 | 681 | static |
a40e3229 | 682 | int lttng_enumerate_process_states(struct lttng_kernel_session *session) |
c337ddc2 MD |
683 | { |
684 | struct task_struct *g, *p; | |
e7a0ca72 MD |
685 | char *tmp; |
686 | ||
687 | tmp = (char *) __get_free_page(GFP_KERNEL); | |
688 | if (!tmp) | |
689 | return -ENOMEM; | |
c337ddc2 MD |
690 | |
691 | rcu_read_lock(); | |
692 | for_each_process(g) { | |
e7a0ca72 MD |
693 | struct files_struct *prev_files = NULL; |
694 | ||
c337ddc2 MD |
695 | p = g; |
696 | do { | |
697 | enum lttng_execution_mode mode = | |
698 | LTTNG_MODE_UNKNOWN; | |
699 | enum lttng_execution_submode submode = | |
700 | LTTNG_UNKNOWN; | |
701 | enum lttng_process_status status; | |
702 | enum lttng_thread_type type; | |
e7a0ca72 | 703 | struct files_struct *files; |
c337ddc2 MD |
704 | |
705 | task_lock(p); | |
706 | if (p->exit_state == EXIT_ZOMBIE) | |
707 | status = LTTNG_ZOMBIE; | |
708 | else if (p->exit_state == EXIT_DEAD) | |
709 | status = LTTNG_DEAD; | |
a9f6bb33 | 710 | else if (lttng_task_is_running(p)) { |
c337ddc2 MD |
711 | /* Is this a forked child that has not run yet? */ |
712 | if (list_empty(&p->rt.run_list)) | |
713 | status = LTTNG_WAIT_FORK; | |
714 | else | |
715 | /* | |
716 | * All tasks are considered as wait_cpu; | |
717 | * the viewer will sort out if the task | |
718 | * was really running at this time. | |
719 | */ | |
720 | status = LTTNG_WAIT_CPU; | |
a9f6bb33 | 721 | } else if (lttng_get_task_state(p) & |
c337ddc2 MD |
722 | (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)) { |
723 | /* Task is waiting for something to complete */ | |
724 | status = LTTNG_WAIT; | |
725 | } else | |
726 | status = LTTNG_UNNAMED; | |
727 | submode = LTTNG_NONE; | |
728 | ||
729 | /* | |
730 | * Verification of t->mm is to filter out kernel | |
731 | * threads; Viewer will further filter out if a | |
732 | * user-space thread was in syscall mode or not. | |
733 | */ | |
734 | if (p->mm) | |
735 | type = LTTNG_USER_THREAD; | |
736 | else | |
737 | type = LTTNG_KERNEL_THREAD; | |
e7a0ca72 | 738 | files = p->files; |
d2a927ac MJ |
739 | |
740 | trace_lttng_statedump_process_state(session, | |
e7a0ca72 | 741 | p, type, mode, submode, status, files); |
73e8ba37 | 742 | lttng_statedump_process_ns(session, |
c337ddc2 | 743 | p, type, mode, submode, status); |
e7a0ca72 MD |
744 | /* |
745 | * As an optimisation for the common case, do not | |
746 | * repeat information for the same files_struct in | |
747 | * two consecutive threads. This is the common case | |
748 | * for threads sharing the same fd table. RCU guarantees | |
749 | * that the same files_struct pointer is not re-used | |
750 | * throughout processes/threads iteration. | |
751 | */ | |
752 | if (files && files != prev_files) { | |
753 | lttng_enumerate_files(session, files, tmp); | |
754 | prev_files = files; | |
755 | } | |
c337ddc2 MD |
756 | task_unlock(p); |
757 | } while_each_thread(g, p); | |
758 | } | |
759 | rcu_read_unlock(); | |
760 | ||
e7a0ca72 MD |
761 | free_page((unsigned long) tmp); |
762 | ||
c337ddc2 MD |
763 | return 0; |
764 | } | |
765 | ||
766 | static | |
767 | void lttng_statedump_work_func(struct work_struct *work) | |
768 | { | |
769 | if (atomic_dec_and_test(&kernel_threads_to_run)) | |
770 | /* If we are the last thread, wake up do_lttng_statedump */ | |
771 | wake_up(&statedump_wq); | |
772 | } | |
773 | ||
774 | static | |
a40e3229 | 775 | int do_lttng_statedump(struct lttng_kernel_session *session) |
c337ddc2 | 776 | { |
cfcee1c7 | 777 | int cpu, ret; |
c337ddc2 | 778 | |
c337ddc2 | 779 | trace_lttng_statedump_start(session); |
cfcee1c7 | 780 | ret = lttng_enumerate_process_states(session); |
cfcee1c7 MD |
781 | if (ret) |
782 | return ret; | |
783 | /* | |
784 | * FIXME | |
785 | * ret = lttng_enumerate_vm_maps(session); | |
786 | * if (ret) | |
787 | * return ret; | |
788 | */ | |
789 | ret = lttng_list_interrupts(session); | |
790 | if (ret) | |
791 | return ret; | |
792 | ret = lttng_enumerate_network_ip_interface(session); | |
793 | if (ret) | |
794 | return ret; | |
795 | ret = lttng_enumerate_block_devices(session); | |
796 | switch (ret) { | |
84c7055e MD |
797 | case 0: |
798 | break; | |
cfcee1c7 MD |
799 | case -ENOSYS: |
800 | printk(KERN_WARNING "LTTng: block device enumeration is not supported by kernel\n"); | |
801 | break; | |
802 | default: | |
803 | return ret; | |
804 | } | |
502e4132 JD |
805 | ret = lttng_enumerate_cpu_topology(session); |
806 | if (ret) | |
807 | return ret; | |
c337ddc2 MD |
808 | |
809 | /* TODO lttng_dump_idt_table(session); */ | |
810 | /* TODO lttng_dump_softirq_vec(session); */ | |
811 | /* TODO lttng_list_modules(session); */ | |
812 | /* TODO lttng_dump_swap_files(session); */ | |
813 | ||
814 | /* | |
815 | * Fire off a work queue on each CPU. Their sole purpose in life | |
816 | * is to guarantee that each CPU has been in a state where is was in | |
817 | * syscall mode (i.e. not in a trap, an IRQ or a soft IRQ). | |
818 | */ | |
8be4c8a3 | 819 | lttng_cpus_read_lock(); |
c337ddc2 MD |
820 | atomic_set(&kernel_threads_to_run, num_online_cpus()); |
821 | for_each_online_cpu(cpu) { | |
822 | INIT_DELAYED_WORK(&cpu_work[cpu], lttng_statedump_work_func); | |
823 | schedule_delayed_work_on(cpu, &cpu_work[cpu], 0); | |
824 | } | |
825 | /* Wait for all threads to run */ | |
7a7128e0 | 826 | __wait_event(statedump_wq, (atomic_read(&kernel_threads_to_run) == 0)); |
8be4c8a3 | 827 | lttng_cpus_read_unlock(); |
c337ddc2 | 828 | /* Our work is done */ |
c337ddc2 MD |
829 | trace_lttng_statedump_end(session); |
830 | return 0; | |
831 | } | |
832 | ||
833 | /* | |
834 | * Called with session mutex held. | |
835 | */ | |
a40e3229 | 836 | int lttng_statedump_start(struct lttng_kernel_session *session) |
c337ddc2 | 837 | { |
c337ddc2 MD |
838 | return do_lttng_statedump(session); |
839 | } | |
840 | EXPORT_SYMBOL_GPL(lttng_statedump_start); | |
841 | ||
dd8d5afb MD |
842 | static |
843 | int __init lttng_statedump_init(void) | |
844 | { | |
d16aa9c9 MD |
845 | /* |
846 | * Allow module to load even if the fixup cannot be done. This | |
847 | * will allow seemless transition when the underlying issue fix | |
848 | * is merged into the Linux kernel, and when tracepoint.c | |
849 | * "tracepoint_module_notify" is turned into a static function. | |
850 | */ | |
851 | (void) wrapper_lttng_fixup_sig(THIS_MODULE); | |
852 | return 0; | |
dd8d5afb MD |
853 | } |
854 | ||
855 | module_init(lttng_statedump_init); | |
856 | ||
461277e7 MD |
857 | static |
858 | void __exit lttng_statedump_exit(void) | |
859 | { | |
860 | } | |
861 | ||
862 | module_exit(lttng_statedump_exit); | |
863 | ||
c337ddc2 MD |
864 | MODULE_LICENSE("GPL and additional rights"); |
865 | MODULE_AUTHOR("Jean-Hugues Deschenes"); | |
1c124020 | 866 | MODULE_DESCRIPTION("LTTng statedump provider"); |
13ab8b0a MD |
867 | MODULE_VERSION(__stringify(LTTNG_MODULES_MAJOR_VERSION) "." |
868 | __stringify(LTTNG_MODULES_MINOR_VERSION) "." | |
869 | __stringify(LTTNG_MODULES_PATCHLEVEL_VERSION) | |
870 | LTTNG_MODULES_EXTRAVERSION); |