Fix: relayd: live client not notified of inactive streams
[lttng-tools.git] / src / bin / lttng-sessiond / manage-consumer.cpp
CommitLineData
4ec029ed 1/*
21cf9b6b 2 * Copyright (C) 2011 EfficiOS Inc.
ab5be9fa
MJ
3 * Copyright (C) 2011 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 * Copyright (C) 2013 Jérémie Galarneau <jeremie.galarneau@efficios.com>
4ec029ed 5 *
ab5be9fa 6 * SPDX-License-Identifier: GPL-2.0-only
4ec029ed 7 *
4ec029ed
JG
8 */
9
28ab034a 10#include "health-sessiond.hpp"
c9e313bc
SM
11#include "manage-consumer.hpp"
12#include "testpoint.hpp"
c9e313bc
SM
13#include "thread.hpp"
14#include "ust-consumer.hpp"
28ab034a
JG
15#include "utils.hpp"
16
17#include <common/pipe.hpp>
18#include <common/utils.hpp>
19
671e39d7 20#include <fcntl.h>
28ab034a 21#include <signal.h>
4ec029ed 22
f1494934 23namespace {
4ec029ed
JG
24struct thread_notifiers {
25 struct lttng_pipe *quit_pipe;
26 struct consumer_data *consumer_data;
52c50f8f 27 sem_t ready;
4ec029ed
JG
28 int initialization_result;
29};
f1494934 30} /* namespace */
4ec029ed
JG
31
32static void mark_thread_as_ready(struct thread_notifiers *notifiers)
33{
34 DBG("Marking consumer management thread as ready");
35 notifiers->initialization_result = 0;
36 sem_post(&notifiers->ready);
37}
38
28ab034a 39static void mark_thread_intialization_as_failed(struct thread_notifiers *notifiers)
4ec029ed 40{
52c50f8f 41 ERR("Consumer management thread entering error state");
4ec029ed
JG
42 notifiers->initialization_result = -1;
43 sem_post(&notifiers->ready);
44}
45
46static void wait_until_thread_is_ready(struct thread_notifiers *notifiers)
47{
48 DBG("Waiting for consumer management thread to be ready");
49 sem_wait(&notifiers->ready);
50 DBG("Consumer management thread is ready");
51}
52
53/*
54 * This thread manage the consumer error sent back to the session daemon.
55 */
0e0b3d3a 56static void *thread_consumer_management(void *data)
4ec029ed 57{
8a00688e
MJ
58 int sock = -1, i, ret, err = -1, should_quit = 0;
59 uint32_t nb_fd;
4ec029ed
JG
60 enum lttcomm_return_code code;
61 struct lttng_poll_event events;
7966af57 62 struct thread_notifiers *notifiers = (thread_notifiers *) data;
4ec029ed 63 struct consumer_data *consumer_data = notifiers->consumer_data;
8a00688e 64 const auto thread_quit_pipe_fd = lttng_pipe_get_readfd(notifiers->quit_pipe);
cd9adb8b 65 struct consumer_socket *cmd_socket_wrapper = nullptr;
4ec029ed
JG
66
67 DBG("[thread] Manage consumer started");
68
69 rcu_register_thread();
70 rcu_thread_online();
71
412d7227 72 health_register(the_health_sessiond, HEALTH_SESSIOND_TYPE_CONSUMER);
4ec029ed
JG
73
74 health_code_update();
75
76 /*
77 * Pass 3 as size here for the thread quit pipe, consumerd_err_sock and the
78 * metadata_sock. Nothing more will be added to this poll set.
79 */
80 ret = lttng_poll_create(&events, 3, LTTNG_CLOEXEC);
81 if (ret < 0) {
82 mark_thread_intialization_as_failed(notifiers);
83 goto error_poll;
84 }
85
1524f98c 86 ret = lttng_poll_add(&events, thread_quit_pipe_fd, LPOLLIN);
4ec029ed
JG
87 if (ret < 0) {
88 mark_thread_intialization_as_failed(notifiers);
89 goto error;
90 }
91
92 /*
93 * The error socket here is already in a listening state which was done
94 * just before spawning this thread to avoid a race between the consumer
95 * daemon exec trying to connect and the listen() call.
96 */
97 ret = lttng_poll_add(&events, consumer_data->err_sock, LPOLLIN | LPOLLRDHUP);
98 if (ret < 0) {
99 mark_thread_intialization_as_failed(notifiers);
100 goto error;
101 }
102
103 health_code_update();
104
105 /* Infinite blocking call, waiting for transmission */
106 health_poll_entry();
107
108 if (testpoint(sessiond_thread_manage_consumer)) {
109 mark_thread_intialization_as_failed(notifiers);
110 goto error;
111 }
112
113 ret = lttng_poll_wait(&events, -1);
114 health_poll_exit();
115 if (ret < 0) {
116 mark_thread_intialization_as_failed(notifiers);
117 goto error;
118 }
119
120 nb_fd = ret;
121
122 for (i = 0; i < nb_fd; i++) {
123 /* Fetch once the poll data */
8a00688e
MJ
124 const auto revents = LTTNG_POLL_GETEV(&events, i);
125 const auto pollfd = LTTNG_POLL_GETFD(&events, i);
4ec029ed
JG
126
127 health_code_update();
128
8a00688e
MJ
129 /* Activity on thread quit pipe, exiting. */
130 if (pollfd == thread_quit_pipe_fd) {
131 DBG("Activity on thread quit pipe");
4ec029ed
JG
132 err = 0;
133 mark_thread_intialization_as_failed(notifiers);
134 goto exit;
135 } else if (pollfd == consumer_data->err_sock) {
136 /* Event on the registration socket */
137 if (revents & LPOLLIN) {
138 continue;
139 } else if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) {
140 ERR("consumer err socket poll error");
141 mark_thread_intialization_as_failed(notifiers);
142 goto error;
143 } else {
144 ERR("Unexpected poll events %u for sock %d", revents, pollfd);
145 mark_thread_intialization_as_failed(notifiers);
146 goto error;
147 }
148 }
149 }
150
151 sock = lttcomm_accept_unix_sock(consumer_data->err_sock);
152 if (sock < 0) {
153 mark_thread_intialization_as_failed(notifiers);
154 goto error;
155 }
156
157 /*
158 * Set the CLOEXEC flag. Return code is useless because either way, the
159 * show must go on.
160 */
161 (void) utils_set_fd_cloexec(sock);
162
163 health_code_update();
164
165 DBG2("Receiving code from consumer err_sock");
166
167 /* Getting status code from kconsumerd */
28ab034a 168 ret = lttcomm_recv_unix_sock(sock, &code, sizeof(enum lttcomm_return_code));
4ec029ed
JG
169 if (ret <= 0) {
170 mark_thread_intialization_as_failed(notifiers);
171 goto error;
172 }
173
174 health_code_update();
175 if (code != LTTCOMM_CONSUMERD_COMMAND_SOCK_READY) {
176 ERR("consumer error when waiting for SOCK_READY : %s",
28ab034a 177 lttcomm_get_readable_code((lttcomm_return_code) -code));
4ec029ed
JG
178 mark_thread_intialization_as_failed(notifiers);
179 goto error;
180 }
181
182 /* Connect both command and metadata sockets. */
28ab034a
JG
183 consumer_data->cmd_sock = lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path);
184 consumer_data->metadata_fd = lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path);
4ec029ed
JG
185 if (consumer_data->cmd_sock < 0 || consumer_data->metadata_fd < 0) {
186 PERROR("consumer connect cmd socket");
187 mark_thread_intialization_as_failed(notifiers);
188 goto error;
189 }
190
191 consumer_data->metadata_sock.fd_ptr = &consumer_data->metadata_fd;
192
193 /* Create metadata socket lock. */
64803277 194 consumer_data->metadata_sock.lock = zmalloc<pthread_mutex_t>();
cd9adb8b 195 if (consumer_data->metadata_sock.lock == nullptr) {
4ec029ed
JG
196 PERROR("zmalloc pthread mutex");
197 mark_thread_intialization_as_failed(notifiers);
198 goto error;
199 }
cd9adb8b 200 pthread_mutex_init(consumer_data->metadata_sock.lock, nullptr);
4ec029ed
JG
201
202 DBG("Consumer command socket ready (fd: %d)", consumer_data->cmd_sock);
28ab034a 203 DBG("Consumer metadata socket ready (fd: %d)", consumer_data->metadata_fd);
4ec029ed
JG
204
205 /*
206 * Remove the consumerd error sock since we've established a connection.
207 */
208 ret = lttng_poll_del(&events, consumer_data->err_sock);
209 if (ret < 0) {
210 mark_thread_intialization_as_failed(notifiers);
211 goto error;
212 }
213
214 /* Add new accepted error socket. */
215 ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLRDHUP);
216 if (ret < 0) {
217 mark_thread_intialization_as_failed(notifiers);
218 goto error;
219 }
220
221 /* Add metadata socket that is successfully connected. */
28ab034a 222 ret = lttng_poll_add(&events, consumer_data->metadata_fd, LPOLLIN | LPOLLRDHUP);
4ec029ed
JG
223 if (ret < 0) {
224 mark_thread_intialization_as_failed(notifiers);
225 goto error;
226 }
227
228 health_code_update();
229
230 /*
09ede842
JG
231 * Transfer the write-end of the channel monitoring pipe to the consumer
232 * by issuing a SET_CHANNEL_MONITOR_PIPE command.
4ec029ed
JG
233 */
234 cmd_socket_wrapper = consumer_allocate_socket(&consumer_data->cmd_sock);
235 if (!cmd_socket_wrapper) {
236 mark_thread_intialization_as_failed(notifiers);
237 goto error;
238 }
239 cmd_socket_wrapper->lock = &consumer_data->lock;
240
09ede842 241 pthread_mutex_lock(cmd_socket_wrapper->lock);
412d7227 242 ret = consumer_init(cmd_socket_wrapper, the_sessiond_uuid);
09ede842
JG
243 if (ret) {
244 ERR("Failed to send sessiond uuid to consumer daemon");
245 mark_thread_intialization_as_failed(notifiers);
246 pthread_mutex_unlock(cmd_socket_wrapper->lock);
247 goto error;
248 }
249 pthread_mutex_unlock(cmd_socket_wrapper->lock);
250
4ec029ed 251 ret = consumer_send_channel_monitor_pipe(cmd_socket_wrapper,
28ab034a 252 consumer_data->channel_monitor_pipe);
4ec029ed
JG
253 if (ret) {
254 mark_thread_intialization_as_failed(notifiers);
255 goto error;
256 }
257
258 /* Discard the socket wrapper as it is no longer needed. */
259 consumer_destroy_socket(cmd_socket_wrapper);
cd9adb8b 260 cmd_socket_wrapper = nullptr;
4ec029ed
JG
261
262 /* The thread is completely initialized, signal that it is ready. */
263 mark_thread_as_ready(notifiers);
264
265 /* Infinite blocking call, waiting for transmission */
cd9adb8b 266 while (true) {
4ec029ed
JG
267 health_code_update();
268
269 /* Exit the thread because the thread quit pipe has been triggered. */
270 if (should_quit) {
271 /* Not a health error. */
272 err = 0;
273 goto exit;
274 }
275
276 health_poll_entry();
277 ret = lttng_poll_wait(&events, -1);
278 health_poll_exit();
279 if (ret < 0) {
280 goto error;
281 }
282
283 nb_fd = ret;
284
285 for (i = 0; i < nb_fd; i++) {
286 /* Fetch once the poll data */
8a00688e
MJ
287 const auto revents = LTTNG_POLL_GETEV(&events, i);
288 const auto pollfd = LTTNG_POLL_GETFD(&events, i);
4ec029ed
JG
289
290 health_code_update();
291
4ec029ed
JG
292 /*
293 * Thread quit pipe has been triggered, flag that we should stop
294 * but continue the current loop to handle potential data from
295 * consumer.
296 */
8a00688e 297 if (pollfd == thread_quit_pipe_fd) {
4ec029ed
JG
298 should_quit = 1;
299 } else if (pollfd == sock) {
300 /* Event on the consumerd socket */
28ab034a
JG
301 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP) &&
302 !(revents & LPOLLIN)) {
4ec029ed
JG
303 ERR("consumer err socket second poll error");
304 goto error;
305 }
306 health_code_update();
307 /* Wait for any kconsumerd error */
28ab034a
JG
308 ret = lttcomm_recv_unix_sock(
309 sock, &code, sizeof(enum lttcomm_return_code));
4ec029ed
JG
310 if (ret <= 0) {
311 ERR("consumer closed the command socket");
312 goto error;
313 }
314
315 ERR("consumer return code : %s",
28ab034a 316 lttcomm_get_readable_code((lttcomm_return_code) -code));
4ec029ed
JG
317
318 goto exit;
319 } else if (pollfd == consumer_data->metadata_fd) {
28ab034a
JG
320 if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP) &&
321 !(revents & LPOLLIN)) {
4ec029ed
JG
322 ERR("consumer err metadata socket second poll error");
323 goto error;
324 }
325 /* UST metadata requests */
28ab034a 326 ret = ust_consumer_metadata_request(&consumer_data->metadata_sock);
4ec029ed
JG
327 if (ret < 0) {
328 ERR("Handling metadata request");
329 goto error;
330 }
331 }
332 /* No need for an else branch all FDs are tested prior. */
333 }
334 health_code_update();
335 }
336
337exit:
338error:
339 /*
340 * We lock here because we are about to close the sockets and some other
341 * thread might be using them so get exclusive access which will abort all
342 * other consumer command by other threads.
343 */
344 pthread_mutex_lock(&consumer_data->lock);
345
346 /* Immediately set the consumerd state to stopped */
347 if (consumer_data->type == LTTNG_CONSUMER_KERNEL) {
412d7227 348 uatomic_set(&the_kernel_consumerd_state, CONSUMER_ERROR);
4ec029ed 349 } else if (consumer_data->type == LTTNG_CONSUMER64_UST ||
28ab034a 350 consumer_data->type == LTTNG_CONSUMER32_UST) {
412d7227 351 uatomic_set(&the_ust_consumerd_state, CONSUMER_ERROR);
4ec029ed
JG
352 } else {
353 /* Code flow error... */
a0377dfe 354 abort();
4ec029ed
JG
355 }
356
357 if (consumer_data->err_sock >= 0) {
358 ret = close(consumer_data->err_sock);
359 if (ret) {
360 PERROR("close");
361 }
362 consumer_data->err_sock = -1;
363 }
364 if (consumer_data->cmd_sock >= 0) {
365 ret = close(consumer_data->cmd_sock);
366 if (ret) {
367 PERROR("close");
368 }
369 consumer_data->cmd_sock = -1;
370 }
28ab034a 371 if (consumer_data->metadata_sock.fd_ptr && *consumer_data->metadata_sock.fd_ptr >= 0) {
4ec029ed
JG
372 ret = close(*consumer_data->metadata_sock.fd_ptr);
373 if (ret) {
374 PERROR("close");
375 }
376 }
377 if (sock >= 0) {
378 ret = close(sock);
379 if (ret) {
380 PERROR("close");
381 }
382 }
383
384 unlink(consumer_data->err_unix_sock_path);
385 unlink(consumer_data->cmd_unix_sock_path);
386 pthread_mutex_unlock(&consumer_data->lock);
387
388 /* Cleanup metadata socket mutex. */
389 if (consumer_data->metadata_sock.lock) {
390 pthread_mutex_destroy(consumer_data->metadata_sock.lock);
391 free(consumer_data->metadata_sock.lock);
392 }
393 lttng_poll_clean(&events);
394
395 if (cmd_socket_wrapper) {
396 consumer_destroy_socket(cmd_socket_wrapper);
397 }
398error_poll:
399 if (err) {
400 health_error();
401 ERR("Health error occurred in %s", __func__);
402 }
412d7227 403 health_unregister(the_health_sessiond);
4ec029ed
JG
404 DBG("consumer thread cleanup completed");
405
406 rcu_thread_offline();
407 rcu_unregister_thread();
408
cd9adb8b 409 return nullptr;
4ec029ed
JG
410}
411
412static bool shutdown_consumer_management_thread(void *data)
413{
7966af57 414 struct thread_notifiers *notifiers = (thread_notifiers *) data;
4ec029ed
JG
415 const int write_fd = lttng_pipe_get_writefd(notifiers->quit_pipe);
416
417 return notify_thread_pipe(write_fd) == 1;
418}
419
420static void cleanup_consumer_management_thread(void *data)
421{
7966af57 422 struct thread_notifiers *notifiers = (thread_notifiers *) data;
4ec029ed
JG
423
424 lttng_pipe_destroy(notifiers->quit_pipe);
425 free(notifiers);
426}
427
428bool launch_consumer_management_thread(struct consumer_data *consumer_data)
429{
430 struct lttng_pipe *quit_pipe;
cd9adb8b 431 struct thread_notifiers *notifiers = nullptr;
4ec029ed
JG
432 struct lttng_thread *thread;
433
64803277 434 notifiers = zmalloc<thread_notifiers>();
4ec029ed 435 if (!notifiers) {
21fa020e
JG
436 goto error_alloc;
437 }
438
439 quit_pipe = lttng_pipe_open(FD_CLOEXEC);
440 if (!quit_pipe) {
4ec029ed
JG
441 goto error;
442 }
443 notifiers->quit_pipe = quit_pipe;
444 notifiers->consumer_data = consumer_data;
445 sem_init(&notifiers->ready, 0, 0);
446
447 thread = lttng_thread_create("Consumer management",
28ab034a
JG
448 thread_consumer_management,
449 shutdown_consumer_management_thread,
450 cleanup_consumer_management_thread,
451 notifiers);
4ec029ed
JG
452 if (!thread) {
453 goto error;
454 }
455 wait_until_thread_is_ready(notifiers);
456 lttng_thread_put(thread);
e0252788 457 return notifiers->initialization_result == 0;
4ec029ed
JG
458error:
459 cleanup_consumer_management_thread(notifiers);
21fa020e 460error_alloc:
4ec029ed
JG
461 return false;
462}
This page took 0.087941 seconds and 4 git commands to generate.