2e532bde4eec832b13895372ddb2b314b7fae180
[lttng-tools.git] / src / bin / lttng-relayd / health-relayd.cpp
1 /*
2 * Copyright (C) 2013 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 *
6 */
7
8 #define _LGPL_SOURCE
9 #include <fcntl.h>
10 #include <getopt.h>
11 #include <grp.h>
12 #include <limits.h>
13 #include <pthread.h>
14 #include <signal.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <sys/ipc.h>
19 #include <sys/resource.h>
20 #include <sys/shm.h>
21 #include <sys/socket.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <urcu/list.h>
25 #include <poll.h>
26 #include <unistd.h>
27 #include <sys/mman.h>
28 #include <urcu/compiler.h>
29 #include <inttypes.h>
30
31 #include <common/defaults.h>
32 #include <common/common.h>
33 #include <common/consumer/consumer.h>
34 #include <common/consumer/consumer-timer.h>
35 #include <common/compat/poll.h>
36 #include <common/sessiond-comm/sessiond-comm.h>
37 #include <common/utils.h>
38 #include <common/compat/getenv.h>
39 #include <common/fd-tracker/utils.h>
40
41 #include "lttng-relayd.h"
42 #include "health-relayd.h"
43
44 /* Global health check unix path */
45 static
46 char health_unix_sock_path[PATH_MAX];
47
48 int health_quit_pipe[2] = { -1, -1 };
49
50 /*
51 * Check if the thread quit pipe was triggered.
52 *
53 * Return 1 if it was triggered else 0;
54 */
55 static
56 int check_health_quit_pipe(int fd, uint32_t events)
57 {
58 if (fd == health_quit_pipe[0] && (events & LPOLLIN)) {
59 return 1;
60 }
61
62 return 0;
63 }
64
65 /*
66 * Send data on a unix socket using the liblttsessiondcomm API.
67 *
68 * Return lttcomm error code.
69 */
70 static int send_unix_sock(int sock, void *buf, size_t len)
71 {
72 /* Check valid length */
73 if (len == 0) {
74 return -1;
75 }
76
77 return lttcomm_send_unix_sock(sock, buf, len);
78 }
79
80 static int create_lttng_rundir_with_perm(const char *rundir)
81 {
82 int ret;
83
84 DBG3("Creating LTTng run directory: %s", rundir);
85
86 ret = mkdir(rundir, S_IRWXU);
87 if (ret < 0) {
88 if (errno != EEXIST) {
89 ERR("Unable to create %s", rundir);
90 goto error;
91 } else {
92 ret = 0;
93 }
94 } else if (ret == 0) {
95 int is_root = !getuid();
96
97 if (is_root) {
98 gid_t gid;
99
100 ret = utils_get_group_id(tracing_group_name, true, &gid);
101 if (ret) {
102 /* Default to root group. */
103 gid = 0;
104 }
105
106 ret = chown(rundir, 0, gid);
107 if (ret < 0) {
108 ERR("Unable to set group on %s", rundir);
109 PERROR("chown");
110 ret = -1;
111 goto error;
112 }
113
114 ret = chmod(rundir,
115 S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
116 if (ret < 0) {
117 ERR("Unable to set permissions on %s", health_unix_sock_path);
118 PERROR("chmod");
119 ret = -1;
120 goto error;
121 }
122 }
123 }
124
125 error:
126 return ret;
127 }
128
129 static
130 int parse_health_env(void)
131 {
132 const char *health_path;
133
134 health_path = lttng_secure_getenv(LTTNG_RELAYD_HEALTH_ENV);
135 if (health_path) {
136 strncpy(health_unix_sock_path, health_path,
137 PATH_MAX);
138 health_unix_sock_path[PATH_MAX - 1] = '\0';
139 }
140
141 return 0;
142 }
143
144 static
145 int setup_health_path(void)
146 {
147 int is_root, ret = 0;
148 const char *home_path = NULL;
149 char *rundir = NULL, *relayd_path = NULL;
150
151 ret = parse_health_env();
152 if (ret) {
153 return ret;
154 }
155
156 is_root = !getuid();
157
158 if (is_root) {
159 rundir = strdup(DEFAULT_LTTNG_RUNDIR);
160 if (!rundir) {
161 ret = -ENOMEM;
162 goto end;
163 }
164 } else {
165 /*
166 * Create rundir from home path. This will create something like
167 * $HOME/.lttng
168 */
169 home_path = utils_get_home_dir();
170
171 if (home_path == NULL) {
172 /* TODO: Add --socket PATH option */
173 ERR("Can't get HOME directory for sockets creation.");
174 ret = -EPERM;
175 goto end;
176 }
177
178 ret = asprintf(&rundir, DEFAULT_LTTNG_HOME_RUNDIR, home_path);
179 if (ret < 0) {
180 ret = -ENOMEM;
181 goto end;
182 }
183 }
184
185 ret = asprintf(&relayd_path, DEFAULT_RELAYD_PATH, rundir);
186 if (ret < 0) {
187 ret = -ENOMEM;
188 goto end;
189 }
190
191 ret = create_lttng_rundir_with_perm(rundir);
192 if (ret < 0) {
193 goto end;
194 }
195
196 ret = create_lttng_rundir_with_perm(relayd_path);
197 if (ret < 0) {
198 goto end;
199 }
200
201 if (is_root) {
202 if (strlen(health_unix_sock_path) != 0) {
203 goto end;
204 }
205 snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
206 DEFAULT_GLOBAL_RELAY_HEALTH_UNIX_SOCK,
207 (int) getpid());
208 } else {
209 /* Set health check Unix path */
210 if (strlen(health_unix_sock_path) != 0) {
211 goto end;
212 }
213
214 snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
215 DEFAULT_HOME_RELAY_HEALTH_UNIX_SOCK,
216 home_path, (int) getpid());
217 }
218
219 end:
220 free(rundir);
221 free(relayd_path);
222 return ret;
223 }
224
225 static
226 int accept_unix_socket(void *data, int *out_fd)
227 {
228 int ret;
229 int accepting_sock = *((int *) data);
230
231 ret = lttcomm_accept_unix_sock(accepting_sock);
232 if (ret < 0) {
233 goto end;
234 }
235
236 *out_fd = ret;
237 ret = 0;
238 end:
239 return ret;
240 }
241
242 static
243 int open_unix_socket(void *data, int *out_fd)
244 {
245 int ret;
246 const char *path = (const char *) data;
247
248 ret = lttcomm_create_unix_sock(path);
249 if (ret < 0) {
250 goto end;
251 }
252
253 *out_fd = ret;
254 ret = 0;
255 end:
256 return ret;
257 }
258
259 /*
260 * Thread managing health check socket.
261 */
262 void *thread_manage_health(void *data)
263 {
264 int sock = -1, new_sock = -1, ret, i, pollfd, err = -1;
265 uint32_t revents, nb_fd;
266 struct lttng_poll_event events;
267 struct health_comm_msg msg;
268 struct health_comm_reply reply;
269 int is_root;
270 char *sock_name;
271
272 DBG("[thread] Manage health check started");
273
274 setup_health_path();
275
276 rcu_register_thread();
277
278 /* We might hit an error path before this is created. */
279 lttng_poll_init(&events);
280
281 /* Create unix socket */
282 ret = asprintf(&sock_name, "Unix socket @ %s", health_unix_sock_path);
283 if (ret == -1) {
284 PERROR("Failed to allocate unix socket name");
285 err = -1;
286 goto error;
287 }
288 ret = fd_tracker_open_unsuspendable_fd(the_fd_tracker, &sock,
289 (const char **) &sock_name, 1, open_unix_socket,
290 health_unix_sock_path);
291 free(sock_name);
292 if (ret < 0) {
293 ERR("Unable to create health check Unix socket");
294 err = -1;
295 goto error;
296 }
297
298 is_root = !getuid();
299 if (is_root) {
300 /* lttng health client socket path permissions */
301 gid_t gid;
302
303 ret = utils_get_group_id(tracing_group_name, true, &gid);
304 if (ret) {
305 /* Default to root group. */
306 gid = 0;
307 }
308
309 ret = chown(health_unix_sock_path, 0, gid);
310 if (ret < 0) {
311 ERR("Unable to set group on %s", health_unix_sock_path);
312 PERROR("chown");
313 err = -1;
314 goto error;
315 }
316
317 ret = chmod(health_unix_sock_path,
318 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
319 if (ret < 0) {
320 ERR("Unable to set permissions on %s", health_unix_sock_path);
321 PERROR("chmod");
322 err = -1;
323 goto error;
324 }
325 }
326
327 /*
328 * Set the CLOEXEC flag. Return code is useless because either way, the
329 * show must go on.
330 */
331 (void) utils_set_fd_cloexec(sock);
332
333 ret = lttcomm_listen_unix_sock(sock);
334 if (ret < 0) {
335 goto error;
336 }
337
338 /* Size is set to 2 for the unix socket and quit pipe. */
339 ret = fd_tracker_util_poll_create(the_fd_tracker,
340 "Health management thread epoll", &events, 2,
341 LTTNG_CLOEXEC);
342 if (ret < 0) {
343 ERR("Poll set creation failed");
344 goto error;
345 }
346
347 ret = lttng_poll_add(&events, health_quit_pipe[0], LPOLLIN);
348 if (ret < 0) {
349 goto error;
350 }
351
352 /* Add the application registration socket */
353 ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLPRI);
354 if (ret < 0) {
355 goto error;
356 }
357
358 lttng_relay_notify_ready();
359
360 while (1) {
361 char *accepted_socket_name;
362
363 DBG("Health check ready");
364
365 /* Inifinite blocking call, waiting for transmission */
366 restart:
367 ret = lttng_poll_wait(&events, -1);
368 if (ret < 0) {
369 /*
370 * Restart interrupted system call.
371 */
372 if (errno == EINTR) {
373 goto restart;
374 }
375 goto error;
376 }
377
378 nb_fd = ret;
379
380 for (i = 0; i < nb_fd; i++) {
381 /* Fetch once the poll data */
382 revents = LTTNG_POLL_GETEV(&events, i);
383 pollfd = LTTNG_POLL_GETFD(&events, i);
384
385 /* Thread quit pipe has been closed. Killing thread. */
386 ret = check_health_quit_pipe(pollfd, revents);
387 if (ret) {
388 err = 0;
389 goto exit;
390 }
391
392 /* Event on the registration socket */
393 if (pollfd == sock) {
394 if (revents & LPOLLIN) {
395 continue;
396 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
397 ERR("Health socket poll error");
398 goto error;
399 } else {
400 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
401 goto error;
402 }
403 }
404 }
405
406 ret = asprintf(&accepted_socket_name, "Socket accepted from unix socket @ %s",
407 health_unix_sock_path);
408 if (ret == -1) {
409 PERROR("Failed to allocate name of accepted socket from unix socket @ %s",
410 health_unix_sock_path);
411 goto error;
412 }
413 ret = fd_tracker_open_unsuspendable_fd(the_fd_tracker, &new_sock,
414 (const char **) &accepted_socket_name, 1,
415 accept_unix_socket, &sock);
416 free(accepted_socket_name);
417 if (ret < 0) {
418 goto error;
419 }
420
421 /*
422 * Set the CLOEXEC flag. Return code is useless because either way, the
423 * show must go on.
424 */
425 (void) utils_set_fd_cloexec(new_sock);
426
427 DBG("Receiving data from client for health...");
428 ret = lttcomm_recv_unix_sock(new_sock, (void *)&msg, sizeof(msg));
429 if (ret <= 0) {
430 DBG("Nothing recv() from client... continuing");
431 ret = fd_tracker_close_unsuspendable_fd(the_fd_tracker,
432 &new_sock, 1, fd_tracker_util_close_fd,
433 NULL);
434 if (ret) {
435 PERROR("close");
436 }
437 new_sock = -1;
438 continue;
439 }
440
441 rcu_thread_online();
442
443 LTTNG_ASSERT(msg.cmd == HEALTH_CMD_CHECK);
444
445 memset(&reply, 0, sizeof(reply));
446 for (i = 0; i < NR_HEALTH_RELAYD_TYPES; i++) {
447 /*
448 * health_check_state return 0 if thread is in
449 * error.
450 */
451 if (!health_check_state(health_relayd, i)) {
452 reply.ret_code |= 1ULL << i;
453 }
454 }
455
456 DBG2("Health check return value %" PRIx64, reply.ret_code);
457
458 ret = send_unix_sock(new_sock, (void *) &reply, sizeof(reply));
459 if (ret < 0) {
460 ERR("Failed to send health data back to client");
461 }
462
463 /* End of transmission */
464 ret = fd_tracker_close_unsuspendable_fd(the_fd_tracker,
465 &new_sock, 1, fd_tracker_util_close_fd,
466 NULL);
467 if (ret) {
468 PERROR("close");
469 }
470 new_sock = -1;
471 }
472
473 error:
474 lttng_relay_stop_threads();
475 exit:
476 if (err) {
477 ERR("Health error occurred in %s", __func__);
478 }
479 DBG("Health check thread dying");
480 unlink(health_unix_sock_path);
481 if (sock >= 0) {
482 ret = fd_tracker_close_unsuspendable_fd(the_fd_tracker, &sock,
483 1, fd_tracker_util_close_fd, NULL);
484 if (ret) {
485 PERROR("close");
486 }
487 }
488
489 /*
490 * We do NOT rmdir rundir nor the relayd path because there are
491 * other processes using them.
492 */
493
494 (void) fd_tracker_util_poll_clean(the_fd_tracker, &events);
495
496 rcu_unregister_thread();
497 return NULL;
498 }
This page took 0.037722 seconds and 3 git commands to generate.