health check API
[lttng-tools.git] / src / bin / lttng-consumerd / health-consumerd.c
CommitLineData
5c635c72
MD
1/*
2 * Copyright (C) 2013 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2 only,
6 * as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 */
17
18#define _GNU_SOURCE
19#include <fcntl.h>
20#include <getopt.h>
21#include <grp.h>
22#include <limits.h>
23#include <pthread.h>
24#include <signal.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <sys/ipc.h>
29#include <sys/resource.h>
30#include <sys/shm.h>
31#include <sys/socket.h>
32#include <sys/stat.h>
33#include <sys/types.h>
34#include <urcu/list.h>
35#include <poll.h>
36#include <unistd.h>
37#include <sys/mman.h>
38#include <assert.h>
39#include <config.h>
40#include <urcu/compiler.h>
41#include <ulimit.h>
42
43#include <common/defaults.h>
44#include <common/common.h>
45#include <common/consumer.h>
46#include <common/consumer-timer.h>
47#include <common/compat/poll.h>
48#include <common/sessiond-comm/sessiond-comm.h>
49#include <common/utils.h>
50
51#include "lttng-consumerd.h"
52#include "health-consumerd.h"
53
54/* Global health check unix path */
55static char health_unix_sock_path[PATH_MAX];
56
57int health_quit_pipe[2];
58
59/*
60 * Check if the thread quit pipe was triggered.
61 *
62 * Return 1 if it was triggered else 0;
63 */
64static
65int check_health_quit_pipe(int fd, uint32_t events)
66{
67 if (fd == health_quit_pipe[0] && (events & LPOLLIN)) {
68 return 1;
69 }
70
71 return 0;
72}
73
74/*
75 * Send data on a unix socket using the liblttsessiondcomm API.
76 *
77 * Return lttcomm error code.
78 */
79static int send_unix_sock(int sock, void *buf, size_t len)
80{
81 /* Check valid length */
82 if (len == 0) {
83 return -1;
84 }
85
86 return lttcomm_send_unix_sock(sock, buf, len);
87}
88
89static
90int setup_health_path(void)
91{
92 int is_root, ret = 0;
93 enum lttng_consumer_type type;
94 const char *home_path;
95
96 type = lttng_consumer_get_type();
97 is_root = !getuid();
98
99 if (is_root) {
100 if (strlen(health_unix_sock_path) != 0) {
101 goto end;
102 }
103 switch (type) {
104 case LTTNG_CONSUMER_KERNEL:
105 snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
106 DEFAULT_GLOBAL_KCONSUMER_HEALTH_UNIX_SOCK);
107 break;
108 case LTTNG_CONSUMER64_UST:
109 snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
110 DEFAULT_GLOBAL_USTCONSUMER64_HEALTH_UNIX_SOCK);
111 break;
112 case LTTNG_CONSUMER32_UST:
113 snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
114 DEFAULT_GLOBAL_USTCONSUMER32_HEALTH_UNIX_SOCK);
115 break;
116 default:
117 ret = -EINVAL;
118 goto end;
119 }
120 } else {
121 static char *rundir;
122
123 home_path = utils_get_home_dir();
124 if (home_path == NULL) {
125 /* TODO: Add --socket PATH option */
126 ERR("Can't get HOME directory for sockets creation.");
127 ret = -EPERM;
128 goto end;
129 }
130
131 /*
132 * Create rundir from home path. This will create something like
133 * $HOME/.lttng
134 */
135 ret = asprintf(&rundir, DEFAULT_LTTNG_HOME_RUNDIR, home_path);
136 if (ret < 0) {
137 ret = -ENOMEM;
138 goto end;
139 }
140
141 /* Set health check Unix path */
142 if (strlen(health_unix_sock_path) != 0) {
143 goto end;
144 }
145 switch (type) {
146 case LTTNG_CONSUMER_KERNEL:
147 snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
148 DEFAULT_HOME_KCONSUMER_HEALTH_UNIX_SOCK, rundir);
149 break;
150 case LTTNG_CONSUMER64_UST:
151 snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
152 DEFAULT_HOME_USTCONSUMER64_HEALTH_UNIX_SOCK, rundir);
153 break;
154 case LTTNG_CONSUMER32_UST:
155 snprintf(health_unix_sock_path, sizeof(health_unix_sock_path),
156 DEFAULT_HOME_USTCONSUMER32_HEALTH_UNIX_SOCK, rundir);
157 break;
158 default:
159 ret = -EINVAL;
160 goto end;
161 }
162 }
163
164end:
165 return ret;
166}
167
168/*
169 * Thread managing health check socket.
170 */
171void *thread_manage_health(void *data)
172{
173 int sock = -1, new_sock = -1, ret, i, pollfd, err = -1;
174 uint32_t revents, nb_fd;
175 struct lttng_poll_event events;
176 struct health_comm_msg msg;
177 struct health_comm_reply reply;
178
179 DBG("[thread] Manage health check started");
180
181 setup_health_path();
182
183 rcu_register_thread();
184
185 /* We might hit an error path before this is created. */
186 lttng_poll_init(&events);
187
188 /* Create unix socket */
189 sock = lttcomm_create_unix_sock(health_unix_sock_path);
190 if (sock < 0) {
191 ERR("Unable to create health check Unix socket");
192 ret = -1;
193 goto error;
194 }
195
196 /*
197 * Set the CLOEXEC flag. Return code is useless because either way, the
198 * show must go on.
199 */
200 (void) utils_set_fd_cloexec(sock);
201
202 ret = lttcomm_listen_unix_sock(sock);
203 if (ret < 0) {
204 goto error;
205 }
206
207 /* Size is set to 1 for the consumer_channel pipe */
208 ret = lttng_poll_create(&events, 2, LTTNG_CLOEXEC);
209 if (ret < 0) {
210 ERR("Poll set creation failed");
211 goto error;
212 }
213
214 ret = lttng_poll_add(&events, health_quit_pipe[0], LPOLLIN);
215 if (ret < 0) {
216 goto error;
217 }
218
219 /* Add the application registration socket */
220 ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLPRI);
221 if (ret < 0) {
222 goto error;
223 }
224
225 while (1) {
226 DBG("Health check ready");
227
228 /* Inifinite blocking call, waiting for transmission */
229restart:
230 ret = lttng_poll_wait(&events, -1);
231 if (ret < 0) {
232 /*
233 * Restart interrupted system call.
234 */
235 if (errno == EINTR) {
236 goto restart;
237 }
238 goto error;
239 }
240
241 nb_fd = ret;
242
243 for (i = 0; i < nb_fd; i++) {
244 /* Fetch once the poll data */
245 revents = LTTNG_POLL_GETEV(&events, i);
246 pollfd = LTTNG_POLL_GETFD(&events, i);
247
248 /* Thread quit pipe has been closed. Killing thread. */
249 ret = check_health_quit_pipe(pollfd, revents);
250 if (ret) {
251 err = 0;
252 goto exit;
253 }
254
255 /* Event on the registration socket */
256 if (pollfd == sock) {
257 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
258 ERR("Health socket poll error");
259 goto error;
260 }
261 }
262 }
263
264 new_sock = lttcomm_accept_unix_sock(sock);
265 if (new_sock < 0) {
266 goto error;
267 }
268
269 /*
270 * Set the CLOEXEC flag. Return code is useless because either way, the
271 * show must go on.
272 */
273 (void) utils_set_fd_cloexec(new_sock);
274
275 DBG("Receiving data from client for health...");
276 ret = lttcomm_recv_unix_sock(new_sock, (void *)&msg, sizeof(msg));
277 if (ret <= 0) {
278 DBG("Nothing recv() from client... continuing");
279 ret = close(new_sock);
280 if (ret) {
281 PERROR("close");
282 }
283 new_sock = -1;
284 continue;
285 }
286
287 rcu_thread_online();
288
289 assert(msg.cmd == HEALTH_CMD_CHECK);
290
291 switch (msg.component) {
292 case LTTNG_HEALTH_CONSUMERD_CHANNEL:
293 reply.ret_code = health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_CHANNEL);
294 break;
295 case LTTNG_HEALTH_CONSUMERD_METADATA:
296 reply.ret_code = health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_METADATA);
297 break;
298 case LTTNG_HEALTH_CONSUMERD_DATA:
299 reply.ret_code = health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_DATA);
300 break;
301 case LTTNG_HEALTH_CONSUMERD_SESSIOND:
302 reply.ret_code = health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_SESSIOND);
303 break;
304 case LTTNG_HEALTH_CONSUMERD_METADATA_TIMER:
305 reply.ret_code = health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_METADATA_TIMER);
306 break;
307
308 case LTTNG_HEALTH_CONSUMERD_ALL:
309 reply.ret_code =
310 health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_CHANNEL) &&
311 health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_METADATA) &&
312 health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_DATA) &&
313 health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_SESSIOND) &&
314 health_check_state(health_consumerd, HEALTH_CONSUMERD_TYPE_METADATA_TIMER);
315 break;
316 default:
317 reply.ret_code = LTTNG_ERR_UND;
318 break;
319 }
320
321 /*
322 * Flip ret value since 0 is a success and 1 indicates a bad health for
323 * the client where in the sessiond it is the opposite. Again, this is
324 * just to make things easier for us poor developer which enjoy a lot
325 * lazyness.
326 */
327 if (reply.ret_code == 0 || reply.ret_code == 1) {
328 reply.ret_code = !reply.ret_code;
329 }
330
331 DBG2("Health check return value %d", reply.ret_code);
332
333 ret = send_unix_sock(new_sock, (void *) &reply, sizeof(reply));
334 if (ret < 0) {
335 ERR("Failed to send health data back to client");
336 }
337
338 /* End of transmission */
339 ret = close(new_sock);
340 if (ret) {
341 PERROR("close");
342 }
343 new_sock = -1;
344 }
345
346exit:
347error:
348 if (err) {
349 ERR("Health error occurred in %s", __func__);
350 }
351 DBG("Health check thread dying");
352 unlink(health_unix_sock_path);
353 if (sock >= 0) {
354 ret = close(sock);
355 if (ret) {
356 PERROR("close");
357 }
358 }
359
360 lttng_poll_clean(&events);
361
362 rcu_unregister_thread();
363 return NULL;
364}
This page took 0.035229 seconds and 4 git commands to generate.