docs: Add supported versions and fix-backport policy
[lttng-tools.git] / src / bin / lttng-relayd / health-relayd.cpp
1 /*
2 * Copyright (C) 2013 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 *
6 */
7
8 #define _LGPL_SOURCE
9 #include "health-relayd.hpp"
10 #include "lttng-relayd.hpp"
11
12 #include <common/common.hpp>
13 #include <common/compat/getenv.hpp>
14 #include <common/compat/poll.hpp>
15 #include <common/consumer/consumer-timer.hpp>
16 #include <common/consumer/consumer.hpp>
17 #include <common/defaults.hpp>
18 #include <common/fd-tracker/utils.hpp>
19 #include <common/sessiond-comm/sessiond-comm.hpp>
20 #include <common/utils.hpp>
21
22 #include <fcntl.h>
23 #include <getopt.h>
24 #include <grp.h>
25 #include <inttypes.h>
26 #include <limits.h>
27 #include <poll.h>
28 #include <pthread.h>
29 #include <signal.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <sys/ipc.h>
34 #include <sys/mman.h>
35 #include <sys/resource.h>
36 #include <sys/shm.h>
37 #include <sys/socket.h>
38 #include <sys/stat.h>
39 #include <sys/types.h>
40 #include <unistd.h>
41 #include <urcu/compiler.h>
42 #include <urcu/list.h>
43
44 /* Global health check unix path */
45 static char health_unix_sock_path[PATH_MAX];
46
47 int health_quit_pipe[2] = { -1, -1 };
48
49 /*
50 * Send data on a unix socket using the liblttsessiondcomm API.
51 *
52 * Return lttcomm error code.
53 */
54 static int send_unix_sock(int sock, void *buf, size_t len)
55 {
56 /* Check valid length */
57 if (len == 0) {
58 return -1;
59 }
60
61 return lttcomm_send_unix_sock(sock, buf, len);
62 }
63
64 static int create_lttng_rundir_with_perm(const char *rundir)
65 {
66 int ret;
67
68 DBG3("Creating LTTng run directory: %s", rundir);
69
70 ret = mkdir(rundir, S_IRWXU);
71 if (ret < 0) {
72 if (errno != EEXIST) {
73 ERR("Unable to create %s", rundir);
74 goto error;
75 } else {
76 ret = 0;
77 }
78 } else if (ret == 0) {
79 int is_root = !getuid();
80
81 if (is_root) {
82 gid_t gid;
83
84 ret = utils_get_group_id(tracing_group_name, true, &gid);
85 if (ret) {
86 /* Default to root group. */
87 gid = 0;
88 }
89
90 ret = chown(rundir, 0, gid);
91 if (ret < 0) {
92 ERR("Unable to set group on %s", rundir);
93 PERROR("chown");
94 ret = -1;
95 goto error;
96 }
97
98 ret = chmod(rundir,
99 S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH |
100 S_IXOTH);
101 if (ret < 0) {
102 ERR("Unable to set permissions on %s", rundir);
103 PERROR("chmod");
104 ret = -1;
105 goto error;
106 }
107 }
108 }
109
110 error:
111 return ret;
112 }
113
114 static int parse_health_env()
115 {
116 const char *health_path;
117
118 health_path = lttng_secure_getenv(LTTNG_RELAYD_HEALTH_ENV);
119 if (health_path) {
120 strncpy(health_unix_sock_path, health_path, PATH_MAX);
121 health_unix_sock_path[PATH_MAX - 1] = '\0';
122 }
123
124 return 0;
125 }
126
127 static int setup_health_path()
128 {
129 int is_root, ret = 0;
130 const char *home_path = nullptr;
131 char *rundir = nullptr, *relayd_path = nullptr;
132
133 ret = parse_health_env();
134 if (ret) {
135 return ret;
136 }
137
138 is_root = !getuid();
139
140 if (is_root) {
141 rundir = strdup(DEFAULT_LTTNG_RUNDIR);
142 if (!rundir) {
143 ret = -ENOMEM;
144 goto end;
145 }
146 } else {
147 /*
148 * Create rundir from home path. This will create something like
149 * $HOME/.lttng
150 */
151 home_path = utils_get_home_dir();
152
153 if (home_path == nullptr) {
154 /* TODO: Add --socket PATH option */
155 ERR("Can't get HOME directory for sockets creation.");
156 ret = -EPERM;
157 goto end;
158 }
159
160 ret = asprintf(&rundir, DEFAULT_LTTNG_HOME_RUNDIR, home_path);
161 if (ret < 0) {
162 ret = -ENOMEM;
163 goto end;
164 }
165 }
166
167 ret = asprintf(&relayd_path, DEFAULT_RELAYD_PATH, rundir);
168 if (ret < 0) {
169 ret = -ENOMEM;
170 goto end;
171 }
172
173 ret = create_lttng_rundir_with_perm(rundir);
174 if (ret < 0) {
175 goto end;
176 }
177
178 ret = create_lttng_rundir_with_perm(relayd_path);
179 if (ret < 0) {
180 goto end;
181 }
182
183 if (is_root) {
184 if (strlen(health_unix_sock_path) != 0) {
185 goto end;
186 }
187 snprintf(health_unix_sock_path,
188 sizeof(health_unix_sock_path),
189 DEFAULT_GLOBAL_RELAY_HEALTH_UNIX_SOCK,
190 (int) getpid());
191 } else {
192 /* Set health check Unix path */
193 if (strlen(health_unix_sock_path) != 0) {
194 goto end;
195 }
196
197 snprintf(health_unix_sock_path,
198 sizeof(health_unix_sock_path),
199 DEFAULT_HOME_RELAY_HEALTH_UNIX_SOCK,
200 home_path,
201 (int) getpid());
202 }
203
204 end:
205 free(rundir);
206 free(relayd_path);
207 return ret;
208 }
209
210 static int accept_unix_socket(void *data, int *out_fd)
211 {
212 int ret;
213 int accepting_sock = *((int *) data);
214
215 ret = lttcomm_accept_unix_sock(accepting_sock);
216 if (ret < 0) {
217 goto end;
218 }
219
220 *out_fd = ret;
221 ret = 0;
222 end:
223 return ret;
224 }
225
226 static int open_unix_socket(void *data, int *out_fd)
227 {
228 int ret;
229 const char *path = (const char *) data;
230
231 ret = lttcomm_create_unix_sock(path);
232 if (ret < 0) {
233 goto end;
234 }
235
236 *out_fd = ret;
237 ret = 0;
238 end:
239 return ret;
240 }
241
242 /*
243 * Thread managing health check socket.
244 */
245 void *thread_manage_health_relayd(void *data __attribute__((unused)))
246 {
247 int sock = -1, new_sock = -1, ret, i, err = -1;
248 uint32_t nb_fd;
249 struct lttng_poll_event events;
250 struct health_comm_msg msg;
251 struct health_comm_reply reply;
252 int is_root;
253 char *sock_name;
254
255 DBG("[thread] Manage health check started");
256
257 setup_health_path();
258
259 rcu_register_thread();
260
261 /* We might hit an error path before this is created. */
262 lttng_poll_init(&events);
263
264 /* Create unix socket */
265 ret = asprintf(&sock_name, "Unix socket @ %s", health_unix_sock_path);
266 if (ret == -1) {
267 PERROR("Failed to allocate unix socket name");
268 err = -1;
269 goto error;
270 }
271 ret = fd_tracker_open_unsuspendable_fd(the_fd_tracker,
272 &sock,
273 (const char **) &sock_name,
274 1,
275 open_unix_socket,
276 health_unix_sock_path);
277 free(sock_name);
278 if (ret < 0) {
279 ERR("Unable to create health check Unix socket");
280 err = -1;
281 goto error;
282 }
283
284 is_root = !getuid();
285 if (is_root) {
286 /* lttng health client socket path permissions */
287 gid_t gid;
288
289 ret = utils_get_group_id(tracing_group_name, true, &gid);
290 if (ret) {
291 /* Default to root group. */
292 gid = 0;
293 }
294
295 ret = chown(health_unix_sock_path, 0, gid);
296 if (ret < 0) {
297 ERR("Unable to set group on %s", health_unix_sock_path);
298 PERROR("chown");
299 err = -1;
300 goto error;
301 }
302
303 ret = chmod(health_unix_sock_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
304 if (ret < 0) {
305 ERR("Unable to set permissions on %s", health_unix_sock_path);
306 PERROR("chmod");
307 err = -1;
308 goto error;
309 }
310 }
311
312 /*
313 * Set the CLOEXEC flag. Return code is useless because either way, the
314 * show must go on.
315 */
316 (void) utils_set_fd_cloexec(sock);
317
318 ret = lttcomm_listen_unix_sock(sock);
319 if (ret < 0) {
320 goto error;
321 }
322
323 /* Size is set to 2 for the unix socket and quit pipe. */
324 ret = fd_tracker_util_poll_create(
325 the_fd_tracker, "Health management thread epoll", &events, 2, LTTNG_CLOEXEC);
326 if (ret < 0) {
327 ERR("Poll set creation failed");
328 goto error;
329 }
330
331 ret = lttng_poll_add(&events, health_quit_pipe[0], LPOLLIN);
332 if (ret < 0) {
333 goto error;
334 }
335
336 /* Add the application registration socket */
337 ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLPRI);
338 if (ret < 0) {
339 goto error;
340 }
341
342 lttng_relay_notify_ready();
343
344 while (true) {
345 char *accepted_socket_name;
346
347 DBG("Health check ready");
348
349 /* Inifinite blocking call, waiting for transmission */
350 restart:
351 ret = lttng_poll_wait(&events, -1);
352 if (ret < 0) {
353 /*
354 * Restart interrupted system call.
355 */
356 if (errno == EINTR) {
357 goto restart;
358 }
359 goto error;
360 }
361
362 nb_fd = ret;
363
364 for (i = 0; i < nb_fd; i++) {
365 /* Fetch once the poll data */
366 const auto revents = LTTNG_POLL_GETEV(&events, i);
367 const auto pollfd = LTTNG_POLL_GETFD(&events, i);
368
369 /* Activity on thread quit pipe, exiting. */
370 if (pollfd == health_quit_pipe[0]) {
371 DBG("Activity on thread quit pipe");
372 err = 0;
373 goto exit;
374 }
375
376 /* Event on the registration socket */
377 if (pollfd == sock) {
378 if (revents & LPOLLIN) {
379 continue;
380 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
381 ERR("Health socket poll error");
382 goto error;
383 } else {
384 ERR("Unexpected poll events %u for sock %d",
385 revents,
386 pollfd);
387 goto error;
388 }
389 }
390 }
391
392 ret = asprintf(&accepted_socket_name,
393 "Socket accepted from unix socket @ %s",
394 health_unix_sock_path);
395 if (ret == -1) {
396 PERROR("Failed to allocate name of accepted socket from unix socket @ %s",
397 health_unix_sock_path);
398 goto error;
399 }
400 ret = fd_tracker_open_unsuspendable_fd(the_fd_tracker,
401 &new_sock,
402 (const char **) &accepted_socket_name,
403 1,
404 accept_unix_socket,
405 &sock);
406 free(accepted_socket_name);
407 if (ret < 0) {
408 goto error;
409 }
410
411 /*
412 * Set the CLOEXEC flag. Return code is useless because either way, the
413 * show must go on.
414 */
415 (void) utils_set_fd_cloexec(new_sock);
416
417 DBG("Receiving data from client for health...");
418 ret = lttcomm_recv_unix_sock(new_sock, (void *) &msg, sizeof(msg));
419 if (ret <= 0) {
420 DBG("Nothing recv() from client... continuing");
421 ret = fd_tracker_close_unsuspendable_fd(
422 the_fd_tracker, &new_sock, 1, fd_tracker_util_close_fd, nullptr);
423 if (ret) {
424 PERROR("close");
425 }
426 new_sock = -1;
427 continue;
428 }
429
430 rcu_thread_online();
431
432 LTTNG_ASSERT(msg.cmd == HEALTH_CMD_CHECK);
433
434 memset(&reply, 0, sizeof(reply));
435 for (i = 0; i < NR_HEALTH_RELAYD_TYPES; i++) {
436 /*
437 * health_check_state return 0 if thread is in
438 * error.
439 */
440 if (!health_check_state(health_relayd, i)) {
441 reply.ret_code |= 1ULL << i;
442 }
443 }
444
445 DBG2("Health check return value %" PRIx64, reply.ret_code);
446
447 ret = send_unix_sock(new_sock, (void *) &reply, sizeof(reply));
448 if (ret < 0) {
449 ERR("Failed to send health data back to client");
450 }
451
452 /* End of transmission */
453 ret = fd_tracker_close_unsuspendable_fd(
454 the_fd_tracker, &new_sock, 1, fd_tracker_util_close_fd, nullptr);
455 if (ret) {
456 PERROR("close");
457 }
458 new_sock = -1;
459 }
460
461 error:
462 lttng_relay_stop_threads();
463 exit:
464 if (err) {
465 ERR("Health error occurred in %s", __func__);
466 }
467 DBG("Health check thread dying");
468 unlink(health_unix_sock_path);
469 if (sock >= 0) {
470 ret = fd_tracker_close_unsuspendable_fd(
471 the_fd_tracker, &sock, 1, fd_tracker_util_close_fd, nullptr);
472 if (ret) {
473 PERROR("close");
474 }
475 }
476
477 /*
478 * We do NOT rmdir rundir nor the relayd path because there are
479 * other processes using them.
480 */
481
482 (void) fd_tracker_util_poll_clean(the_fd_tracker, &events);
483
484 rcu_unregister_thread();
485 return nullptr;
486 }
This page took 0.038092 seconds and 4 git commands to generate.