From: David Goulet Date: Thu, 19 Apr 2012 14:58:42 +0000 (-0400) Subject: Fix: report error to client on consumerd error X-Git-Tag: v2.1.0-rc1~141 X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=commitdiff_plain;h=5c827ce0dce140b121032837510f89cb70d1650d;hp=84c3304d2e2f8114fee99e7bd394094a30583dba Fix: report error to client on consumerd error Adds a consumer daemon state flag that allows us to know the state of the consumer during client command processing. Please refer to the comment in lttng-sessiond/main.c above the definition of those flags for more information and a possible race condition issue. This is a quick fix for bug #137 that will be merged in 2.0-stable branch. Signed-off-by: David Goulet --- diff --git a/doc/man/lttng.1 b/doc/man/lttng.1 index 5c873d797..f13d5d341 100644 --- a/doc/man/lttng.1 +++ b/doc/man/lttng.1 @@ -639,6 +639,12 @@ Tracing already started .IP "81" Tracing already stopped + +.IP "98" +No UST consumer detected + +.IP "99" +No Kernel consumer detected .PP .SH "ENVIRONMENT VARIABLES" diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c index 3c917b25f..7327c3cb2 100644 --- a/src/bin/lttng-sessiond/main.c +++ b/src/bin/lttng-sessiond/main.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -179,6 +180,40 @@ static const char *consumerd64_bin = CONFIG_CONSUMERD64_BIN; static const char *consumerd32_libdir = CONFIG_CONSUMERD32_LIBDIR; static const char *consumerd64_libdir = CONFIG_CONSUMERD64_LIBDIR; +/* + * Consumer daemon state which is changed when spawning it, killing it or in + * case of a fatal error. + */ +enum consumerd_state { + CONSUMER_STARTED = 1, + CONSUMER_STOPPED = 2, + CONSUMER_ERROR = 3, +}; + +/* + * This consumer daemon state is used to validate if a client command will be + * able to reach the consumer. If not, the client is informed. For instance, + * doing a "lttng start" when the consumer state is set to ERROR will return an + * error to the client. + * + * The following example shows a possible race condition of this scheme: + * + * consumer thread error happens + * client cmd arrives + * client cmd checks state -> still OK + * consumer thread exit, sets error + * client cmd try to talk to consumer + * ... + * + * However, since the consumer is a different daemon, we have no way of making + * sure the command will reach it safely even with this state flag. This is why + * we consider that up to the state validation during command processing, the + * command is safe. After that, we can not guarantee the correctness of the + * client request vis-a-vis the consumer. + */ +static enum consumerd_state ust_consumerd_state; +static enum consumerd_state kernel_consumerd_state; + static void setup_consumerd_path(void) { @@ -450,7 +485,6 @@ static void cleanup(void) if (ret) { PERROR("close"); } - } } for (i = 0; i < 2; i++) { @@ -1090,6 +1124,17 @@ restart_poll: ERR("consumer return code : %s", lttcomm_get_readable_code(-code)); error: + /* Immediately set the consumerd state to stopped */ + if (consumer_data->type == LTTNG_CONSUMER_KERNEL) { + uatomic_set(&kernel_consumerd_state, CONSUMER_ERROR); + } else if (consumer_data->type == LTTNG_CONSUMER64_UST || + consumer_data->type == LTTNG_CONSUMER32_UST) { + uatomic_set(&ust_consumerd_state, CONSUMER_ERROR); + } else { + /* Code flow error... */ + assert(0); + } + if (consumer_data->err_sock >= 0) { ret = close(consumer_data->err_sock); if (ret) { @@ -3361,6 +3406,12 @@ static int process_client_msg(struct command_ctx *cmd_ctx) } } + /* Consumer is in an ERROR state. Report back to client */ + if (uatomic_read(&kernel_consumerd_state) == CONSUMER_ERROR) { + ret = LTTCOMM_NO_KERNCONSUMERD; + goto error; + } + /* Need a session for kernel command */ if (need_tracing_session) { if (cmd_ctx->session->kernel_session == NULL) { @@ -3381,13 +3432,21 @@ static int process_client_msg(struct command_ctx *cmd_ctx) ret = LTTCOMM_KERN_CONSUMER_FAIL; goto error; } + uatomic_set(&kernel_consumerd_state, CONSUMER_STARTED); } else { pthread_mutex_unlock(&kconsumer_data.pid_mutex); } } + break; case LTTNG_DOMAIN_UST: { + /* Consumer is in an ERROR state. Report back to client */ + if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) { + ret = LTTCOMM_NO_USTCONSUMERD; + goto error; + } + if (need_tracing_session) { if (cmd_ctx->session->ust_session == NULL) { ret = create_ust_session(cmd_ctx->session, @@ -3411,6 +3470,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx) } ust_consumerd64_fd = ustconsumer64_data.cmd_sock; + uatomic_set(&ust_consumerd_state, CONSUMER_STARTED); } else { pthread_mutex_unlock(&ustconsumer64_data.pid_mutex); } @@ -3425,7 +3485,9 @@ static int process_client_msg(struct command_ctx *cmd_ctx) ust_consumerd32_fd = -EINVAL; goto error; } + ust_consumerd32_fd = ustconsumer32_data.cmd_sock; + uatomic_set(&ust_consumerd_state, CONSUMER_STARTED); } else { pthread_mutex_unlock(&ustconsumer32_data.pid_mutex); } @@ -3437,6 +3499,25 @@ static int process_client_msg(struct command_ctx *cmd_ctx) } skip_domain: + /* Validate consumer daemon state when start/stop trace command */ + if (cmd_ctx->lsm->cmd_type == LTTNG_START_TRACE || + cmd_ctx->lsm->cmd_type == LTTNG_STOP_TRACE) { + switch (cmd_ctx->lsm->domain.type) { + case LTTNG_DOMAIN_UST: + if (uatomic_read(&ust_consumerd_state) != CONSUMER_STARTED) { + ret = LTTCOMM_NO_USTCONSUMERD; + goto error; + } + break; + case LTTNG_DOMAIN_KERNEL: + if (uatomic_read(&kernel_consumerd_state) != CONSUMER_STARTED) { + ret = LTTCOMM_NO_KERNCONSUMERD; + goto error; + } + break; + } + } + /* * Check that the UID or GID match that of the tracing session. * The root user can interact with all sessions. @@ -4504,6 +4585,10 @@ int main(int argc, char **argv) } } + /* Set consumer initial state */ + kernel_consumerd_state = CONSUMER_STOPPED; + ust_consumerd_state = CONSUMER_STOPPED; + DBG("Client socket path %s", client_unix_sock_path); DBG("Application socket path %s", apps_unix_sock_path); DBG("LTTng run directory path: %s", rundir); diff --git a/src/common/sessiond-comm/sessiond-comm.c b/src/common/sessiond-comm/sessiond-comm.c index 11319f333..6ad4a3775 100644 --- a/src/common/sessiond-comm/sessiond-comm.c +++ b/src/common/sessiond-comm/sessiond-comm.c @@ -123,6 +123,8 @@ static const char *lttcomm_readable_code[] = { [ LTTCOMM_ERR_INDEX(CONSUMERD_SPLICE_ESPIPE) ] = "consumerd splice ESPIPE", [ LTTCOMM_ERR_INDEX(LTTCOMM_NO_EVENT) ] = "Event not found", [ LTTCOMM_ERR_INDEX(LTTCOMM_INVALID) ] = "Invalid parameter", + [ LTTCOMM_ERR_INDEX(LTTCOMM_NO_USTCONSUMERD) ] = "No UST consumer detected", + [ LTTCOMM_ERR_INDEX(LTTCOMM_NO_KERNCONSUMERD) ] = "No kernel consumer detected", }; /* diff --git a/src/common/sessiond-comm/sessiond-comm.h b/src/common/sessiond-comm/sessiond-comm.h index 6cc98d542..4d31289a4 100644 --- a/src/common/sessiond-comm/sessiond-comm.h +++ b/src/common/sessiond-comm/sessiond-comm.h @@ -157,6 +157,8 @@ enum lttcomm_return_code { CONSUMERD_SPLICE_ENOMEM, /* ENOMEM from splice(2) */ CONSUMERD_SPLICE_ESPIPE, /* ESPIPE from splice(2) */ LTTCOMM_INVALID, /* Invalid parameter */ + LTTCOMM_NO_USTCONSUMERD, /* No UST consumer detected */ + LTTCOMM_NO_KERNCONSUMERD, /* No Kernel consumer detected */ /* MUST be last element */ LTTCOMM_NR, /* Last element */