X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-sessiond%2Fmain.c;h=7327c3cb2262ddf4337b46c08f2d02160f1136e7;hp=4d8851ef813168a8445ad77ef12c1fdc838101d9;hb=5c827ce0dce140b121032837510f89cb70d1650d;hpb=256a55760810f90bdcbd3d8bc13963677ba49b9d diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c index 4d8851ef8..7327c3cb2 100644 --- a/src/bin/lttng-sessiond/main.c +++ b/src/bin/lttng-sessiond/main.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -54,6 +55,7 @@ #include "shm.h" #include "ust-ctl.h" #include "utils.h" +#include "fd-limit.h" #define CONSUMERD_FILE "lttng-consumerd" @@ -178,6 +180,40 @@ static const char *consumerd64_bin = CONFIG_CONSUMERD64_BIN; static const char *consumerd32_libdir = CONFIG_CONSUMERD32_LIBDIR; static const char *consumerd64_libdir = CONFIG_CONSUMERD64_LIBDIR; +/* + * Consumer daemon state which is changed when spawning it, killing it or in + * case of a fatal error. + */ +enum consumerd_state { + CONSUMER_STARTED = 1, + CONSUMER_STOPPED = 2, + CONSUMER_ERROR = 3, +}; + +/* + * This consumer daemon state is used to validate if a client command will be + * able to reach the consumer. If not, the client is informed. For instance, + * doing a "lttng start" when the consumer state is set to ERROR will return an + * error to the client. + * + * The following example shows a possible race condition of this scheme: + * + * consumer thread error happens + * client cmd arrives + * client cmd checks state -> still OK + * consumer thread exit, sets error + * client cmd try to talk to consumer + * ... + * + * However, since the consumer is a different daemon, we have no way of making + * sure the command will reach it safely even with this state flag. This is why + * we consider that up to the state validation during command processing, the + * command is safe. After that, we can not guarantee the correctness of the + * client request vis-a-vis the consumer. + */ +static enum consumerd_state ust_consumerd_state; +static enum consumerd_state kernel_consumerd_state; + static void setup_consumerd_path(void) { @@ -449,7 +485,6 @@ static void cleanup(void) if (ret) { PERROR("close"); } - } } for (i = 0; i < 2; i++) { @@ -1089,6 +1124,17 @@ restart_poll: ERR("consumer return code : %s", lttcomm_get_readable_code(-code)); error: + /* Immediately set the consumerd state to stopped */ + if (consumer_data->type == LTTNG_CONSUMER_KERNEL) { + uatomic_set(&kernel_consumerd_state, CONSUMER_ERROR); + } else if (consumer_data->type == LTTNG_CONSUMER64_UST || + consumer_data->type == LTTNG_CONSUMER32_UST) { + uatomic_set(&ust_consumerd_state, CONSUMER_ERROR); + } else { + /* Code flow error... */ + assert(0); + } + if (consumer_data->err_sock >= 0) { ret = close(consumer_data->err_sock); if (ret) { @@ -1424,6 +1470,17 @@ static void *thread_registration_apps(void *data) * Using message-based transmissions to ensure we don't * have to deal with partially received messages. */ + ret = lttng_fd_get(LTTNG_FD_APPS, 1); + if (ret < 0) { + ERR("Exhausted file descriptors allowed for applications."); + free(ust_cmd); + ret = close(sock); + if (ret) { + PERROR("close"); + } + sock = -1; + continue; + } ret = lttcomm_recv_unix_sock(sock, &ust_cmd->reg_msg, sizeof(struct ust_register_msg)); if (ret < 0 || ret < sizeof(struct ust_register_msg)) { @@ -1437,6 +1494,7 @@ static void *thread_registration_apps(void *data) if (ret) { PERROR("close"); } + lttng_fd_put(LTTNG_FD_APPS, 1); sock = -1; continue; } @@ -1482,6 +1540,7 @@ error: if (ret) { PERROR("close"); } + lttng_fd_put(LTTNG_FD_APPS, 1); } unlink(apps_unix_sock_path); @@ -3347,6 +3406,12 @@ static int process_client_msg(struct command_ctx *cmd_ctx) } } + /* Consumer is in an ERROR state. Report back to client */ + if (uatomic_read(&kernel_consumerd_state) == CONSUMER_ERROR) { + ret = LTTCOMM_NO_KERNCONSUMERD; + goto error; + } + /* Need a session for kernel command */ if (need_tracing_session) { if (cmd_ctx->session->kernel_session == NULL) { @@ -3367,13 +3432,21 @@ static int process_client_msg(struct command_ctx *cmd_ctx) ret = LTTCOMM_KERN_CONSUMER_FAIL; goto error; } + uatomic_set(&kernel_consumerd_state, CONSUMER_STARTED); } else { pthread_mutex_unlock(&kconsumer_data.pid_mutex); } } + break; case LTTNG_DOMAIN_UST: { + /* Consumer is in an ERROR state. Report back to client */ + if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) { + ret = LTTCOMM_NO_USTCONSUMERD; + goto error; + } + if (need_tracing_session) { if (cmd_ctx->session->ust_session == NULL) { ret = create_ust_session(cmd_ctx->session, @@ -3397,6 +3470,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx) } ust_consumerd64_fd = ustconsumer64_data.cmd_sock; + uatomic_set(&ust_consumerd_state, CONSUMER_STARTED); } else { pthread_mutex_unlock(&ustconsumer64_data.pid_mutex); } @@ -3411,7 +3485,9 @@ static int process_client_msg(struct command_ctx *cmd_ctx) ust_consumerd32_fd = -EINVAL; goto error; } + ust_consumerd32_fd = ustconsumer32_data.cmd_sock; + uatomic_set(&ust_consumerd_state, CONSUMER_STARTED); } else { pthread_mutex_unlock(&ustconsumer32_data.pid_mutex); } @@ -3423,6 +3499,25 @@ static int process_client_msg(struct command_ctx *cmd_ctx) } skip_domain: + /* Validate consumer daemon state when start/stop trace command */ + if (cmd_ctx->lsm->cmd_type == LTTNG_START_TRACE || + cmd_ctx->lsm->cmd_type == LTTNG_STOP_TRACE) { + switch (cmd_ctx->lsm->domain.type) { + case LTTNG_DOMAIN_UST: + if (uatomic_read(&ust_consumerd_state) != CONSUMER_STARTED) { + ret = LTTCOMM_NO_USTCONSUMERD; + goto error; + } + break; + case LTTNG_DOMAIN_KERNEL: + if (uatomic_read(&kernel_consumerd_state) != CONSUMER_STARTED) { + ret = LTTCOMM_NO_KERNCONSUMERD; + goto error; + } + break; + } + } + /* * Check that the UID or GID match that of the tracing session. * The root user can interact with all sessions. @@ -4490,6 +4585,10 @@ int main(int argc, char **argv) } } + /* Set consumer initial state */ + kernel_consumerd_state = CONSUMER_STOPPED; + ust_consumerd_state = CONSUMER_STOPPED; + DBG("Client socket path %s", client_unix_sock_path); DBG("Application socket path %s", apps_unix_sock_path); DBG("LTTng run directory path: %s", rundir); @@ -4556,6 +4655,8 @@ int main(int argc, char **argv) /* Set ulimit for open files */ set_ulimit(); } + /* init lttng_fd tracking must be done after set_ulimit. */ + lttng_fd_init(); ret = set_consumer_sockets(&ustconsumer64_data, rundir); if (ret < 0) {