X-Git-Url: https://git.lttng.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fbin%2Flttng-sessiond%2Fmain.c;h=859223a11216b1d342b0899ebdd4e66bc657d657;hb=840cb59cfc335e2ca44841f6fd57972ac3a623be;hp=e5a22460fc3c6fe2342bf82d015d6da84a85ffd2;hpb=35f90c40cf023393851ac47ad526a0e9e1184503;p=lttng-tools.git diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c index e5a22460f..859223a11 100644 --- a/src/bin/lttng-sessiond/main.c +++ b/src/bin/lttng-sessiond/main.c @@ -222,12 +222,6 @@ enum consumerd_state { static enum consumerd_state ust_consumerd_state; static enum consumerd_state kernel_consumerd_state; -/* Used for the health monitoring of the session daemon. See health.h */ -struct health_state health_thread_cmd; -struct health_state health_thread_app_manage; -struct health_state health_thread_app_reg; -struct health_state health_thread_kernel; - /* * Socket timeout for receiving and sending in seconds. */ @@ -638,7 +632,7 @@ static int update_kernel_stream(struct consumer_data *consumer_data, int fd) struct lttng_ht_iter iter; struct consumer_socket *socket; - + rcu_read_lock(); cds_lfht_for_each_entry(ksess->consumer->socks->ht, &iter.iter, socket, node.node) { /* Code flow error */ @@ -649,9 +643,11 @@ static int update_kernel_stream(struct consumer_data *consumer_data, int fd) channel, ksess); pthread_mutex_unlock(socket->lock); if (ret < 0) { + rcu_read_unlock(); goto error; } } + rcu_read_unlock(); } goto error; } @@ -703,6 +699,8 @@ static void *thread_manage_kernel(void *data) DBG("[thread] Thread manage kernel started"); + health_register(HEALTH_TYPE_KERNEL); + /* * This first step of the while is to clean this structure which could free * non NULL pointers so zero it before the loop. @@ -713,14 +711,14 @@ static void *thread_manage_kernel(void *data) goto error_testpoint; } - health_code_update(&health_thread_kernel); + health_code_update(); if (testpoint(thread_manage_kernel_before_loop)) { goto error_testpoint; } while (1) { - health_code_update(&health_thread_kernel); + health_code_update(); if (update_poll_flag == 1) { /* Clean events object. We are about to populate it again. */ @@ -748,9 +746,9 @@ static void *thread_manage_kernel(void *data) /* Poll infinite value of time */ restart: - health_poll_update(&health_thread_kernel); + health_poll_update(); ret = lttng_poll_wait(&events, -1); - health_poll_update(&health_thread_kernel); + health_poll_update(); if (ret < 0) { /* * Restart interrupted system call. @@ -773,7 +771,7 @@ static void *thread_manage_kernel(void *data) revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); - health_code_update(&health_thread_kernel); + health_code_update(); /* Thread quit pipe has been closed. Killing thread. */ ret = check_thread_quit_pipe(pollfd, revents); @@ -821,12 +819,12 @@ error_testpoint: utils_close_pipe(kernel_poll_pipe); kernel_poll_pipe[0] = kernel_poll_pipe[1] = -1; if (err) { - health_error(&health_thread_kernel); + health_error(); ERR("Health error occurred in %s", __func__); WARN("Kernel thread died unexpectedly. " "Kernel tracing can continue but CPU hotplug is disabled."); } - health_exit(&health_thread_kernel); + health_unregister(); DBG("Kernel thread dying"); return NULL; } @@ -867,6 +865,8 @@ static void *thread_manage_consumer(void *data) DBG("[thread] Manage consumer started"); + health_register(HEALTH_TYPE_CONSUMER); + /* * Since the consumer thread can be spawned at any moment in time, we init * the health to a poll status (1, which is a valid health over time). @@ -884,7 +884,7 @@ static void *thread_manage_consumer(void *data) * In a nutshell, the following poll update to the health state brings back * the state to an even value meaning a code path. */ - health_poll_update(&consumer_data->health); + health_poll_update(); /* * Pass 2 as size here for the thread quit pipe and kconsumerd_err_sock. @@ -905,18 +905,18 @@ static void *thread_manage_consumer(void *data) goto error; } - health_code_update(&consumer_data->health); + health_code_update(); /* Inifinite blocking call, waiting for transmission */ restart: - health_poll_update(&consumer_data->health); + health_poll_update(); if (testpoint(thread_manage_consumer)) { goto error; } ret = lttng_poll_wait(&events, -1); - health_poll_update(&consumer_data->health); + health_poll_update(); if (ret < 0) { /* * Restart interrupted system call. @@ -934,7 +934,7 @@ restart: revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); - health_code_update(&consumer_data->health); + health_code_update(); /* Thread quit pipe has been closed. Killing thread. */ ret = check_thread_quit_pipe(pollfd, revents); @@ -963,7 +963,7 @@ restart: */ (void) utils_set_fd_cloexec(sock); - health_code_update(&consumer_data->health); + health_code_update(); DBG2("Receiving code from consumer err_sock"); @@ -974,7 +974,7 @@ restart: goto error; } - health_code_update(&consumer_data->health); + health_code_update(); if (code == LTTCOMM_CONSUMERD_COMMAND_SOCK_READY) { consumer_data->cmd_sock = @@ -1004,13 +1004,13 @@ restart: goto error; } - health_code_update(&consumer_data->health); + health_code_update(); /* Inifinite blocking call, waiting for transmission */ restart_poll: - health_poll_update(&consumer_data->health); + health_poll_update(); ret = lttng_poll_wait(&events, -1); - health_poll_update(&consumer_data->health); + health_poll_update(); if (ret < 0) { /* * Restart interrupted system call. @@ -1028,7 +1028,7 @@ restart_poll: revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); - health_code_update(&consumer_data->health); + health_code_update(); /* Thread quit pipe has been closed. Killing thread. */ ret = check_thread_quit_pipe(pollfd, revents); @@ -1046,7 +1046,7 @@ restart_poll: } } - health_code_update(&consumer_data->health); + health_code_update(); /* Wait for any kconsumerd error */ ret = lttcomm_recv_unix_sock(sock, &code, @@ -1097,10 +1097,10 @@ error: lttng_poll_clean(&events); error_poll: if (err) { - health_error(&consumer_data->health); + health_error(); ERR("Health error occurred in %s", __func__); } - health_exit(&consumer_data->health); + health_unregister(); DBG("consumer thread cleanup completed"); return NULL; @@ -1121,11 +1121,13 @@ static void *thread_manage_apps(void *data) rcu_register_thread(); rcu_thread_online(); + health_register(HEALTH_TYPE_APP_MANAGE); + if (testpoint(thread_manage_apps)) { goto error_testpoint; } - health_code_update(&health_thread_app_manage); + health_code_update(); ret = create_thread_poll_set(&events, 2); if (ret < 0) { @@ -1141,16 +1143,16 @@ static void *thread_manage_apps(void *data) goto error; } - health_code_update(&health_thread_app_manage); + health_code_update(); while (1) { DBG("Apps thread polling on %d fds", LTTNG_POLL_GETNB(&events)); /* Inifinite blocking call, waiting for transmission */ restart: - health_poll_update(&health_thread_app_manage); + health_poll_update(); ret = lttng_poll_wait(&events, -1); - health_poll_update(&health_thread_app_manage); + health_poll_update(); if (ret < 0) { /* * Restart interrupted system call. @@ -1168,7 +1170,7 @@ static void *thread_manage_apps(void *data) revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); - health_code_update(&health_thread_app_manage); + health_code_update(); /* Thread quit pipe has been closed. Killing thread. */ ret = check_thread_quit_pipe(pollfd, revents); @@ -1192,7 +1194,7 @@ static void *thread_manage_apps(void *data) goto error; } - health_code_update(&health_thread_app_manage); + health_code_update(); /* Register applicaton to the session daemon */ ret = ust_app_register(&ust_cmd.reg_msg, @@ -1203,7 +1205,7 @@ static void *thread_manage_apps(void *data) break; } - health_code_update(&health_thread_app_manage); + health_code_update(); /* * Validate UST version compatibility. @@ -1217,7 +1219,7 @@ static void *thread_manage_apps(void *data) update_ust_app(ust_cmd.sock); } - health_code_update(&health_thread_app_manage); + health_code_update(); ret = ust_app_register_done(ust_cmd.sock); if (ret < 0) { @@ -1248,7 +1250,7 @@ static void *thread_manage_apps(void *data) ust_cmd.sock); } - health_code_update(&health_thread_app_manage); + health_code_update(); break; } @@ -1270,7 +1272,7 @@ static void *thread_manage_apps(void *data) } } - health_code_update(&health_thread_app_manage); + health_code_update(); } } @@ -1289,10 +1291,10 @@ error_testpoint: */ if (err) { - health_error(&health_thread_app_manage); + health_error(); ERR("Health error occurred in %s", __func__); } - health_exit(&health_thread_app_manage); + health_unregister(); DBG("Application communication apps thread cleanup complete"); rcu_thread_offline(); rcu_unregister_thread(); @@ -1389,6 +1391,8 @@ static void *thread_registration_apps(void *data) DBG("[thread] Manage application registration started"); + health_register(HEALTH_TYPE_APP_REG); + if (testpoint(thread_registration_apps)) { goto error_testpoint; } @@ -1426,9 +1430,9 @@ static void *thread_registration_apps(void *data) /* Inifinite blocking call, waiting for transmission */ restart: - health_poll_update(&health_thread_app_reg); + health_poll_update(); ret = lttng_poll_wait(&events, -1); - health_poll_update(&health_thread_app_reg); + health_poll_update(); if (ret < 0) { /* * Restart interrupted system call. @@ -1442,7 +1446,7 @@ static void *thread_registration_apps(void *data) nb_fd = ret; for (i = 0; i < nb_fd; i++) { - health_code_update(&health_thread_app_reg); + health_code_update(); /* Fetch once the poll data */ revents = LTTNG_POLL_GETEV(&events, i); @@ -1494,7 +1498,7 @@ static void *thread_registration_apps(void *data) sock = -1; continue; } - health_code_update(&health_thread_app_reg); + health_code_update(); ret = lttcomm_recv_unix_sock(sock, &ust_cmd->reg_msg, sizeof(struct ust_register_msg)); if (ret < 0 || ret < sizeof(struct ust_register_msg)) { @@ -1512,7 +1516,7 @@ static void *thread_registration_apps(void *data) sock = -1; continue; } - health_code_update(&health_thread_app_reg); + health_code_update(); ust_cmd->sock = sock; sock = -1; @@ -1543,7 +1547,7 @@ static void *thread_registration_apps(void *data) exit: error: if (err) { - health_error(&health_thread_app_reg); + health_error(); ERR("Health error occurred in %s", __func__); } @@ -1571,7 +1575,7 @@ error_listen: error_create_poll: error_testpoint: DBG("UST Registration thread cleanup complete"); - health_exit(&health_thread_app_reg); + health_unregister(); return NULL; } @@ -1946,9 +1950,7 @@ static int check_consumer_health(void) { int ret; - ret = health_check_state(&kconsumer_data.health) && - health_check_state(&ustconsumer32_data.health) && - health_check_state(&ustconsumer64_data.health); + ret = health_check_state(HEALTH_TYPE_CONSUMER); DBG3("Health consumer check %d", ret); @@ -2102,7 +2104,7 @@ static int create_ust_session(struct ltt_session *session, DBG("Creating UST session"); - lus = trace_ust_create_session(session->path, session->id, domain); + lus = trace_ust_create_session(session->path, session->id); if (lus == NULL) { ret = LTTNG_ERR_UST_SESS_FAIL; goto error; @@ -3066,26 +3068,26 @@ restart: switch (msg.component) { case LTTNG_HEALTH_CMD: - reply.ret_code = health_check_state(&health_thread_cmd); + reply.ret_code = health_check_state(HEALTH_TYPE_CMD); break; case LTTNG_HEALTH_APP_MANAGE: - reply.ret_code = health_check_state(&health_thread_app_manage); + reply.ret_code = health_check_state(HEALTH_TYPE_APP_MANAGE); break; case LTTNG_HEALTH_APP_REG: - reply.ret_code = health_check_state(&health_thread_app_reg); + reply.ret_code = health_check_state(HEALTH_TYPE_APP_REG); break; case LTTNG_HEALTH_KERNEL: - reply.ret_code = health_check_state(&health_thread_kernel); + reply.ret_code = health_check_state(HEALTH_TYPE_KERNEL); break; case LTTNG_HEALTH_CONSUMER: reply.ret_code = check_consumer_health(); break; case LTTNG_HEALTH_ALL: reply.ret_code = - health_check_state(&health_thread_app_manage) && - health_check_state(&health_thread_app_reg) && - health_check_state(&health_thread_cmd) && - health_check_state(&health_thread_kernel) && + health_check_state(HEALTH_TYPE_APP_MANAGE) && + health_check_state(HEALTH_TYPE_APP_REG) && + health_check_state(HEALTH_TYPE_CMD) && + health_check_state(HEALTH_TYPE_KERNEL) && check_consumer_health(); break; default: @@ -3160,11 +3162,13 @@ static void *thread_manage_clients(void *data) rcu_register_thread(); + health_register(HEALTH_TYPE_CMD); + if (testpoint(thread_manage_clients)) { goto error_testpoint; } - health_code_update(&health_thread_cmd); + health_code_update(); ret = lttcomm_listen_unix_sock(client_sock); if (ret < 0) { @@ -3197,16 +3201,16 @@ static void *thread_manage_clients(void *data) goto error; } - health_code_update(&health_thread_cmd); + health_code_update(); while (1) { DBG("Accepting client command ..."); /* Inifinite blocking call, waiting for transmission */ restart: - health_poll_update(&health_thread_cmd); + health_poll_update(); ret = lttng_poll_wait(&events, -1); - health_poll_update(&health_thread_cmd); + health_poll_update(); if (ret < 0) { /* * Restart interrupted system call. @@ -3224,7 +3228,7 @@ static void *thread_manage_clients(void *data) revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); - health_code_update(&health_thread_cmd); + health_code_update(); /* Thread quit pipe has been closed. Killing thread. */ ret = check_thread_quit_pipe(pollfd, revents); @@ -3244,7 +3248,7 @@ static void *thread_manage_clients(void *data) DBG("Wait for client response"); - health_code_update(&health_thread_cmd); + health_code_update(); sock = lttcomm_accept_unix_sock(client_sock); if (sock < 0) { @@ -3280,7 +3284,7 @@ static void *thread_manage_clients(void *data) cmd_ctx->llm = NULL; cmd_ctx->session = NULL; - health_code_update(&health_thread_cmd); + health_code_update(); /* * Data is received from the lttng client. The struct @@ -3301,7 +3305,7 @@ static void *thread_manage_clients(void *data) continue; } - health_code_update(&health_thread_cmd); + health_code_update(); // TODO: Validate cmd_ctx including sanity check for // security purpose. @@ -3334,7 +3338,7 @@ static void *thread_manage_clients(void *data) continue; } - health_code_update(&health_thread_cmd); + health_code_update(); DBG("Sending response (size: %d, retcode: %s)", cmd_ctx->lttng_msg_size, @@ -3353,7 +3357,7 @@ static void *thread_manage_clients(void *data) clean_command_ctx(&cmd_ctx); - health_code_update(&health_thread_cmd); + health_code_update(); } exit: @@ -3380,11 +3384,11 @@ error_testpoint: } if (err) { - health_error(&health_thread_cmd); + health_error(); ERR("Health error occurred in %s", __func__); } - health_exit(&health_thread_cmd); + health_unregister(); DBG("Client thread dying"); @@ -4165,26 +4169,6 @@ int main(int argc, char **argv) cmd_init(); - /* Init all health thread counters. */ - health_init(&health_thread_cmd); - health_init(&health_thread_kernel); - health_init(&health_thread_app_manage); - health_init(&health_thread_app_reg); - - /* - * Init health counters of the consumer thread. We do a quick hack here to - * the state of the consumer health is fine even if the thread is not - * started. Once the thread starts, the health state is updated with a poll - * value to set a health code path. This is simply to ease our life and has - * no cost what so ever. - */ - health_init(&kconsumer_data.health); - health_poll_update(&kconsumer_data.health); - health_init(&ustconsumer32_data.health); - health_poll_update(&ustconsumer32_data.health); - health_init(&ustconsumer64_data.health); - health_poll_update(&ustconsumer64_data.health); - /* Check for the application socket timeout env variable. */ env_app_timeout = getenv(DEFAULT_APP_SOCKET_TIMEOUT_ENV); if (env_app_timeout) {