Tests: Add test to check shared-memory FD leaks after relayd dies
[lttng-tools.git] / tests / regression / tools / live / test_per_application_leaks.py
CommitLineData
9a28bc04
KS
1#!/usr/bin/env python3
2#
3# SPDX-FileCyoprightText: Kienan Stewart <kstewart@efficios.com>
4# SPDX-License-Identifier: GPL-2.0-only
5
6"""
7Test that the consumerd doesn't leak file descriptor allocations in /dev/shm
8when the relayd exits before instrumented applications start.
9
10@see https://bugs.lttng.org/issues/1411
11"""
12
13import os
14import pathlib
15import subprocess
16import sys
17
18test_utils_import_path = pathlib.Path(__file__).absolute().parents[3] / "utils"
19sys.path.append(str(test_utils_import_path))
20
21import lttngtest
22
23
24def get_consumerd_pid(tap, parent, match_string):
25 pid = 0
26 try:
27 process = subprocess.Popen(
28 ["pgrep", "-P", str(parent), "-f", match_string],
29 stdout=subprocess.PIPE,
30 )
31 process.wait()
32 output = str(process.stdout.read(), encoding="UTF-8").splitlines()
33 if len(output) != 1:
34 raise Exception(
35 "Unexpected number of output lines (got {}): {}".format(
36 len(output), output
37 )
38 )
39 pid = int(output[0])
40 except Exception as e:
41 tap.diagnostic(
42 "Failed to find child process of '{}' matching '{}': '{}'".format(
43 parent, match_string, str(e)
44 )
45 )
46 return pid
47
48
49def count_process_dev_shm_fds(pid):
50 count = 0
51 if pid == 0:
52 return count
53 dir = os.path.join("/proc", str(pid), "fd")
54 for root, dirs, files in os.walk(dir):
55 for f in files:
56 filename = pathlib.Path(os.path.join(root, f))
57 try:
58 if filename.is_symlink() and str(filename.resolve()).startswith(
59 "/dev/shm/shm-ust-consumer"
60 ):
61 count += 1
62 except FileNotFoundError:
63 # As we're walking /proc/XX/fd/, fds may be added or removed
64 continue
65 return count
66
67
68def count_dev_shm_fds(tap, test_env):
69 consumer32_pid = get_consumerd_pid(tap, test_env._sessiond.pid, "ustconsumerd32")
70 fds_consumerd32 = count_process_dev_shm_fds(consumer32_pid)
71 consumer64_pid = get_consumerd_pid(tap, test_env._sessiond.pid, "ustconsumerd64")
72 fds_consumerd64 = count_process_dev_shm_fds(consumer64_pid)
73 return (fds_consumerd32, fds_consumerd64)
74
75
76def test_fd_leak(tap, test_env, buffer_sharing_policy, kill_relayd=True):
77 tap.diagnostic(
78 "test_fd_leak with buffer sharing policy {}, kill relayd: {}".format(
79 buffer_sharing_policy, kill_relayd
80 )
81 )
82 client = lttngtest.LTTngClient(test_env, log=tap.diagnostic)
83 output = lttngtest.NetworkSessionOutputLocation(
84 "net://localhost:{}:{}/".format(
85 test_env.lttng_relayd_control_port, test_env.lttng_relayd_data_port
86 )
87 )
88
89 session = client.create_session(output=output, live=True)
90 channel = session.add_channel(
91 lttngtest.lttngctl.TracingDomain.User,
92 buffer_sharing_policy=buffer_sharing_policy,
93 )
94 channel.add_recording_rule(lttngtest.lttngctl.UserTracepointEventRule())
95 session.start()
96
97 count_post_start = count_dev_shm_fds(tap, test_env)
98
99 # Kill the relayd
100 if kill_relayd:
101 test_env._terminate_relayd()
102
103 test_env.launch_wait_trace_test_application(10)
104 count_post_app1 = count_dev_shm_fds(tap, test_env)
105
106 test_env.launch_wait_trace_test_application(10)
107 count_post_app2 = count_dev_shm_fds(tap, test_env)
108
109 test_env.launch_wait_trace_test_application(10)
110 count_post_app3 = count_dev_shm_fds(tap, test_env)
111
112 session.stop()
113 session.destroy()
114
115 count_post_destroy = count_dev_shm_fds(tap, test_env)
116
117 tap.diagnostic(
118 "FD counts post-start: {}, post-destroy: {}".format(
119 count_post_start, count_post_destroy
120 )
121 )
122 tap.test(
123 count_post_start == count_post_destroy,
124 "Count of consumerd FDs in /dev/shm are equal after session start then after destroy",
125 )
126
127 tap.diagnostic(
128 "FD counts post-app-1: {}, post-app-2: {}, post-app-3: {}".format(
129 count_post_app1, count_post_app2, count_post_app3
130 )
131 )
132 if buffer_sharing_policy == lttngtest.lttngctl.BufferSharingPolicy.PerUID:
133 tap.test(
134 (count_post_app1 == count_post_app2)
135 and (count_post_app2 == count_post_app3),
136 "Count of consumerd FDs in /dev/shm doesn't leak over several application invocations",
137 )
138 else:
139 tap.skip(
140 "Count of consumerds FDs in /dev/shm doesn't leak over several application invocations - no mechanism is available to guarantee buffer reclamation within a given time frame"
141 )
142
143
144tap = lttngtest.TapGenerator(8)
145for kill_relayd in [True, False]:
146 for buffer_sharing_policy in [
147 lttngtest.lttngctl.BufferSharingPolicy.PerUID,
148 lttngtest.lttngctl.BufferSharingPolicy.PerPID,
149 ]:
150 with lttngtest.test_environment(
151 log=tap.diagnostic, with_relayd=True, with_sessiond=True
152 ) as test_env:
153 test_fd_leak(tap, test_env, buffer_sharing_policy, kill_relayd)
154
155sys.exit(0 if tap.is_successful else 1)
This page took 0.029196 seconds and 4 git commands to generate.