Commit | Line | Data |
---|---|---|
9a28bc04 KS |
1 | #!/usr/bin/env python3 |
2 | # | |
3 | # SPDX-FileCyoprightText: Kienan Stewart <kstewart@efficios.com> | |
4 | # SPDX-License-Identifier: GPL-2.0-only | |
5 | ||
6 | """ | |
7 | Test that the consumerd doesn't leak file descriptor allocations in /dev/shm | |
8 | when the relayd exits before instrumented applications start. | |
9 | ||
10 | @see https://bugs.lttng.org/issues/1411 | |
11 | """ | |
12 | ||
13 | import os | |
14 | import pathlib | |
15 | import subprocess | |
16 | import sys | |
17 | ||
18 | test_utils_import_path = pathlib.Path(__file__).absolute().parents[3] / "utils" | |
19 | sys.path.append(str(test_utils_import_path)) | |
20 | ||
21 | import lttngtest | |
22 | ||
23 | ||
24 | def get_consumerd_pid(tap, parent, match_string): | |
25 | pid = 0 | |
26 | try: | |
27 | process = subprocess.Popen( | |
28 | ["pgrep", "-P", str(parent), "-f", match_string], | |
29 | stdout=subprocess.PIPE, | |
30 | ) | |
31 | process.wait() | |
32 | output = str(process.stdout.read(), encoding="UTF-8").splitlines() | |
33 | if len(output) != 1: | |
34 | raise Exception( | |
35 | "Unexpected number of output lines (got {}): {}".format( | |
36 | len(output), output | |
37 | ) | |
38 | ) | |
39 | pid = int(output[0]) | |
40 | except Exception as e: | |
41 | tap.diagnostic( | |
42 | "Failed to find child process of '{}' matching '{}': '{}'".format( | |
43 | parent, match_string, str(e) | |
44 | ) | |
45 | ) | |
46 | return pid | |
47 | ||
48 | ||
49 | def count_process_dev_shm_fds(pid): | |
50 | count = 0 | |
51 | if pid == 0: | |
52 | return count | |
53 | dir = os.path.join("/proc", str(pid), "fd") | |
54 | for root, dirs, files in os.walk(dir): | |
55 | for f in files: | |
56 | filename = pathlib.Path(os.path.join(root, f)) | |
57 | try: | |
58 | if filename.is_symlink() and str(filename.resolve()).startswith( | |
59 | "/dev/shm/shm-ust-consumer" | |
60 | ): | |
61 | count += 1 | |
62 | except FileNotFoundError: | |
63 | # As we're walking /proc/XX/fd/, fds may be added or removed | |
64 | continue | |
65 | return count | |
66 | ||
67 | ||
68 | def count_dev_shm_fds(tap, test_env): | |
69 | consumer32_pid = get_consumerd_pid(tap, test_env._sessiond.pid, "ustconsumerd32") | |
70 | fds_consumerd32 = count_process_dev_shm_fds(consumer32_pid) | |
71 | consumer64_pid = get_consumerd_pid(tap, test_env._sessiond.pid, "ustconsumerd64") | |
72 | fds_consumerd64 = count_process_dev_shm_fds(consumer64_pid) | |
73 | return (fds_consumerd32, fds_consumerd64) | |
74 | ||
75 | ||
76 | def test_fd_leak(tap, test_env, buffer_sharing_policy, kill_relayd=True): | |
77 | tap.diagnostic( | |
78 | "test_fd_leak with buffer sharing policy {}, kill relayd: {}".format( | |
79 | buffer_sharing_policy, kill_relayd | |
80 | ) | |
81 | ) | |
82 | client = lttngtest.LTTngClient(test_env, log=tap.diagnostic) | |
83 | output = lttngtest.NetworkSessionOutputLocation( | |
84 | "net://localhost:{}:{}/".format( | |
85 | test_env.lttng_relayd_control_port, test_env.lttng_relayd_data_port | |
86 | ) | |
87 | ) | |
88 | ||
89 | session = client.create_session(output=output, live=True) | |
90 | channel = session.add_channel( | |
91 | lttngtest.lttngctl.TracingDomain.User, | |
92 | buffer_sharing_policy=buffer_sharing_policy, | |
93 | ) | |
94 | channel.add_recording_rule(lttngtest.lttngctl.UserTracepointEventRule()) | |
95 | session.start() | |
96 | ||
97 | count_post_start = count_dev_shm_fds(tap, test_env) | |
98 | ||
99 | # Kill the relayd | |
100 | if kill_relayd: | |
101 | test_env._terminate_relayd() | |
102 | ||
103 | test_env.launch_wait_trace_test_application(10) | |
104 | count_post_app1 = count_dev_shm_fds(tap, test_env) | |
105 | ||
106 | test_env.launch_wait_trace_test_application(10) | |
107 | count_post_app2 = count_dev_shm_fds(tap, test_env) | |
108 | ||
109 | test_env.launch_wait_trace_test_application(10) | |
110 | count_post_app3 = count_dev_shm_fds(tap, test_env) | |
111 | ||
112 | session.stop() | |
113 | session.destroy() | |
114 | ||
115 | count_post_destroy = count_dev_shm_fds(tap, test_env) | |
116 | ||
117 | tap.diagnostic( | |
118 | "FD counts post-start: {}, post-destroy: {}".format( | |
119 | count_post_start, count_post_destroy | |
120 | ) | |
121 | ) | |
122 | tap.test( | |
123 | count_post_start == count_post_destroy, | |
124 | "Count of consumerd FDs in /dev/shm are equal after session start then after destroy", | |
125 | ) | |
126 | ||
127 | tap.diagnostic( | |
128 | "FD counts post-app-1: {}, post-app-2: {}, post-app-3: {}".format( | |
129 | count_post_app1, count_post_app2, count_post_app3 | |
130 | ) | |
131 | ) | |
132 | if buffer_sharing_policy == lttngtest.lttngctl.BufferSharingPolicy.PerUID: | |
133 | tap.test( | |
134 | (count_post_app1 == count_post_app2) | |
135 | and (count_post_app2 == count_post_app3), | |
136 | "Count of consumerd FDs in /dev/shm doesn't leak over several application invocations", | |
137 | ) | |
138 | else: | |
139 | tap.skip( | |
140 | "Count of consumerds FDs in /dev/shm doesn't leak over several application invocations - no mechanism is available to guarantee buffer reclamation within a given time frame" | |
141 | ) | |
142 | ||
143 | ||
144 | tap = lttngtest.TapGenerator(8) | |
145 | for kill_relayd in [True, False]: | |
146 | for buffer_sharing_policy in [ | |
147 | lttngtest.lttngctl.BufferSharingPolicy.PerUID, | |
148 | lttngtest.lttngctl.BufferSharingPolicy.PerPID, | |
149 | ]: | |
150 | with lttngtest.test_environment( | |
151 | log=tap.diagnostic, with_relayd=True, with_sessiond=True | |
152 | ) as test_env: | |
153 | test_fd_leak(tap, test_env, buffer_sharing_policy, kill_relayd) | |
154 | ||
155 | sys.exit(0 if tap.is_successful else 1) |