Fix: relay_recv_metadata does not check for partial write
[lttng-tools.git] / src / bin / lttng-relayd / health-relayd.c
CommitLineData
65931c8b
MD
1/*
2 * Copyright (C) 2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2 only,
6 * as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 */
17
6c1c0768 18#define _LGPL_SOURCE
65931c8b
MD
19#include <fcntl.h>
20#include <getopt.h>
21#include <grp.h>
22#include <limits.h>
23#include <pthread.h>
24#include <signal.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <sys/ipc.h>
29#include <sys/resource.h>
30#include <sys/shm.h>
31#include <sys/socket.h>
32#include <sys/stat.h>
33#include <sys/types.h>
34#include <urcu/list.h>
35#include <poll.h>
36#include <unistd.h>
37#include <sys/mman.h>
38#include <assert.h>
65931c8b 39#include <urcu/compiler.h>
65931c8b
MD
40#include <inttypes.h>
41
42#include <common/defaults.h>
43#include <common/common.h>
c8fea79c
JR
44#include <common/consumer/consumer.h>
45#include <common/consumer/consumer-timer.h>
65931c8b
MD
46#include <common/compat/poll.h>
47#include <common/sessiond-comm/sessiond-comm.h>
48#include <common/utils.h>
e8fa9fb0 49#include <common/compat/getenv.h>
65931c8b
MD
50
51#include "lttng-relayd.h"
52#include "health-relayd.h"
53
54/* Global health check unix path */
094fe907
MD
55static
56char health_unix_sock_path[PATH_MAX];
65931c8b
MD
57
58int health_quit_pipe[2];
59
60/*
61 * Check if the thread quit pipe was triggered.
62 *
63 * Return 1 if it was triggered else 0;
64 */
65static
66int check_health_quit_pipe(int fd, uint32_t events)
67{
68 if (fd == health_quit_pipe[0] && (events & LPOLLIN)) {
69 return 1;
70 }
71
72 return 0;
73}
74
75/*
76 * Send data on a unix socket using the liblttsessiondcomm API.
77 *
78 * Return lttcomm error code.
79 */
80static int send_unix_sock(int sock, void *buf, size_t len)
81{
82 /* Check valid length */
83 if (len == 0) {
84 return -1;
85 }
86
87 return lttcomm_send_unix_sock(sock, buf, len);
88}
89
90static int create_lttng_rundir_with_perm(const char *rundir)
91{
92 int ret;
93
94 DBG3("Creating LTTng run directory: %s", rundir);
95
96 ret = mkdir(rundir, S_IRWXU);
97 if (ret < 0) {
98 if (errno != EEXIST) {
99 ERR("Unable to create %s", rundir);
100 goto error;
101 } else {
102 ret = 0;
103 }
104 } else if (ret == 0) {
105 int is_root = !getuid();
106
107 if (is_root) {
108 ret = chown(rundir, 0,
109 utils_get_group_id(tracing_group_name));
110 if (ret < 0) {
111 ERR("Unable to set group on %s", rundir);
112 PERROR("chown");
113 ret = -1;
114 goto error;
115 }
116
117 ret = chmod(rundir,
118 S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
119 if (ret < 0) {
120 ERR("Unable to set permissions on %s", health_unix_sock_path);
121 PERROR("chmod");
122 ret = -1;
123 goto error;
124 }
125 }
126 }
127
128error:
129 return ret;
130}
131
094fe907
MD
132static
133int parse_health_env(void)
134{
135 const char *health_path;
136
e8fa9fb0 137 health_path = lttng_secure_getenv(LTTNG_RELAYD_HEALTH_ENV);
094fe907
MD
138 if (health_path) {
139 strncpy(health_unix_sock_path, health_path,
140 PATH_MAX);
141 health_unix_sock_path[PATH_MAX - 1] = '\0';
142 }
143
144 return 0;
145}
146
65931c8b
MD
147static
148int setup_health_path(void)
149{
150 int is_root, ret = 0;
edd94901 151 char *home_path = NULL, *rundir = NULL, *relayd_path = NULL;
65931c8b 152
094fe907
MD
153 ret = parse_health_env();
154 if (ret) {
155 return ret;
156 }
157
65931c8b
MD
158 is_root = !getuid();
159
160 if (is_root) {
c3844e39 161 rundir = strdup(DEFAULT_LTTNG_RUNDIR);
b6ab01aa
MD
162 if (!rundir) {
163 ret = -ENOMEM;
164 goto end;
165 }
65931c8b
MD
166 } else {
167 /*
168 * Create rundir from home path. This will create something like
169 * $HOME/.lttng
170 */
171 home_path = utils_get_home_dir();
172
173 if (home_path == NULL) {
174 /* TODO: Add --socket PATH option */
175 ERR("Can't get HOME directory for sockets creation.");
176 ret = -EPERM;
177 goto end;
178 }
179
c3844e39 180 ret = asprintf(&rundir, DEFAULT_LTTNG_HOME_RUNDIR, home_path);
65931c8b
MD
181 if (ret < 0) {
182 ret = -ENOMEM;
183 goto end;
184 }
185 }
186
c3844e39 187 ret = asprintf(&relayd_path, DEFAULT_RELAYD_PATH, rundir);
65931c8b
MD
188 if (ret < 0) {
189 ret = -ENOMEM;
190 goto end;
191 }
192
c3844e39 193 ret = create_lttng_rundir_with_perm(rundir);
65931c8b
MD
194 if (ret < 0) {
195 goto end;
196 }
197
198 ret = create_lttng_rundir_with_perm(relayd_path);
199 if (ret < 0) {
200 goto end;
201 }
202
203 if (is_root) {
204 if (strlen(health_unix_sock_path) != 0) {
205 goto end;
206 }
207 snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
208 DEFAULT_GLOBAL_RELAY_HEALTH_UNIX_SOCK,
d1f721c5 209 (int) getpid());
65931c8b
MD
210 } else {
211 /* Set health check Unix path */
212 if (strlen(health_unix_sock_path) != 0) {
213 goto end;
214 }
215
216 snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
217 DEFAULT_HOME_RELAY_HEALTH_UNIX_SOCK,
d1f721c5 218 home_path, (int) getpid());
65931c8b
MD
219 }
220
221end:
c3844e39 222 free(rundir);
edd94901 223 free(relayd_path);
65931c8b
MD
224 return ret;
225}
226
227/*
228 * Thread managing health check socket.
229 */
230void *thread_manage_health(void *data)
231{
232 int sock = -1, new_sock = -1, ret, i, pollfd, err = -1;
233 uint32_t revents, nb_fd;
234 struct lttng_poll_event events;
235 struct health_comm_msg msg;
236 struct health_comm_reply reply;
237 int is_root;
238
239 DBG("[thread] Manage health check started");
240
241 setup_health_path();
242
243 rcu_register_thread();
244
245 /* We might hit an error path before this is created. */
246 lttng_poll_init(&events);
247
248 /* Create unix socket */
249 sock = lttcomm_create_unix_sock(health_unix_sock_path);
250 if (sock < 0) {
251 ERR("Unable to create health check Unix socket");
7568ddbf 252 err = -1;
65931c8b
MD
253 goto error;
254 }
255
256 is_root = !getuid();
257 if (is_root) {
258 /* lttng health client socket path permissions */
259 ret = chown(health_unix_sock_path, 0,
260 utils_get_group_id(tracing_group_name));
261 if (ret < 0) {
262 ERR("Unable to set group on %s", health_unix_sock_path);
263 PERROR("chown");
7568ddbf 264 err = -1;
65931c8b
MD
265 goto error;
266 }
267
268 ret = chmod(health_unix_sock_path,
269 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
270 if (ret < 0) {
271 ERR("Unable to set permissions on %s", health_unix_sock_path);
272 PERROR("chmod");
7568ddbf 273 err = -1;
65931c8b
MD
274 goto error;
275 }
276 }
277
278 /*
279 * Set the CLOEXEC flag. Return code is useless because either way, the
280 * show must go on.
281 */
282 (void) utils_set_fd_cloexec(sock);
283
284 ret = lttcomm_listen_unix_sock(sock);
285 if (ret < 0) {
286 goto error;
287 }
288
289 /* Size is set to 1 for the consumer_channel pipe */
290 ret = lttng_poll_create(&events, 2, LTTNG_CLOEXEC);
291 if (ret < 0) {
292 ERR("Poll set creation failed");
293 goto error;
294 }
295
296 ret = lttng_poll_add(&events, health_quit_pipe[0], LPOLLIN);
297 if (ret < 0) {
298 goto error;
299 }
300
301 /* Add the application registration socket */
302 ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLPRI);
303 if (ret < 0) {
304 goto error;
305 }
306
3fd27398
MD
307 lttng_relay_notify_ready();
308
65931c8b
MD
309 while (1) {
310 DBG("Health check ready");
311
312 /* Inifinite blocking call, waiting for transmission */
313restart:
314 ret = lttng_poll_wait(&events, -1);
315 if (ret < 0) {
316 /*
317 * Restart interrupted system call.
318 */
319 if (errno == EINTR) {
320 goto restart;
321 }
322 goto error;
323 }
324
325 nb_fd = ret;
326
327 for (i = 0; i < nb_fd; i++) {
328 /* Fetch once the poll data */
329 revents = LTTNG_POLL_GETEV(&events, i);
330 pollfd = LTTNG_POLL_GETFD(&events, i);
331
fd20dac9
MD
332 if (!revents) {
333 /* No activity for this FD (poll implementation). */
334 continue;
335 }
336
65931c8b
MD
337 /* Thread quit pipe has been closed. Killing thread. */
338 ret = check_health_quit_pipe(pollfd, revents);
339 if (ret) {
340 err = 0;
341 goto exit;
342 }
343
344 /* Event on the registration socket */
345 if (pollfd == sock) {
03e43155
MD
346 if (revents & LPOLLIN) {
347 continue;
348 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
65931c8b
MD
349 ERR("Health socket poll error");
350 goto error;
03e43155
MD
351 } else {
352 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
353 goto error;
65931c8b
MD
354 }
355 }
356 }
357
358 new_sock = lttcomm_accept_unix_sock(sock);
359 if (new_sock < 0) {
360 goto error;
361 }
362
363 /*
364 * Set the CLOEXEC flag. Return code is useless because either way, the
365 * show must go on.
366 */
367 (void) utils_set_fd_cloexec(new_sock);
368
369 DBG("Receiving data from client for health...");
370 ret = lttcomm_recv_unix_sock(new_sock, (void *)&msg, sizeof(msg));
371 if (ret <= 0) {
372 DBG("Nothing recv() from client... continuing");
373 ret = close(new_sock);
374 if (ret) {
375 PERROR("close");
376 }
377 new_sock = -1;
378 continue;
379 }
380
381 rcu_thread_online();
382
383 assert(msg.cmd == HEALTH_CMD_CHECK);
384
53efb85a 385 memset(&reply, 0, sizeof(reply));
65931c8b
MD
386 for (i = 0; i < NR_HEALTH_RELAYD_TYPES; i++) {
387 /*
388 * health_check_state return 0 if thread is in
389 * error.
390 */
391 if (!health_check_state(health_relayd, i)) {
392 reply.ret_code |= 1ULL << i;
393 }
394 }
395
396 DBG2("Health check return value %" PRIx64, reply.ret_code);
397
398 ret = send_unix_sock(new_sock, (void *) &reply, sizeof(reply));
399 if (ret < 0) {
400 ERR("Failed to send health data back to client");
401 }
402
403 /* End of transmission */
404 ret = close(new_sock);
405 if (ret) {
406 PERROR("close");
407 }
408 new_sock = -1;
409 }
410
65931c8b 411error:
81714439
JG
412 lttng_relay_stop_threads();
413exit:
65931c8b
MD
414 if (err) {
415 ERR("Health error occurred in %s", __func__);
416 }
417 DBG("Health check thread dying");
418 unlink(health_unix_sock_path);
419 if (sock >= 0) {
420 ret = close(sock);
421 if (ret) {
422 PERROR("close");
423 }
424 }
425
dcbcae3e
MD
426 /*
427 * We do NOT rmdir rundir nor the relayd path because there are
428 * other processes using them.
429 */
430
65931c8b
MD
431 lttng_poll_clean(&events);
432
433 rcu_unregister_thread();
434 return NULL;
435}
This page took 0.054169 seconds and 4 git commands to generate.