X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-sessiond%2Fmain.c;h=452b7ba7eff94e1254775a12ae7f99d4add40108;hp=8d0a841ed8e71cc88a2f5a7d688ecbf1e3c49d3e;hb=ef599319218f03ff160778e577cd8389a165f47c;hpb=d14d33bf091e72b23b1f90ea18a0a01bed098b76 diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c index 8d0a841ed..452b7ba7e 100644 --- a/src/bin/lttng-sessiond/main.c +++ b/src/bin/lttng-sessiond/main.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include @@ -43,38 +43,22 @@ #include #include #include +#include #include "lttng-sessiond.h" #include "channel.h" #include "context.h" #include "event.h" -#include "futex.h" #include "kernel.h" +#include "kernel-consumer.h" #include "modprobe.h" #include "shm.h" #include "ust-ctl.h" #include "utils.h" +#include "fd-limit.h" #define CONSUMERD_FILE "lttng-consumerd" -struct consumer_data { - enum lttng_consumer_type type; - - pthread_t thread; /* Worker thread interacting with the consumer */ - sem_t sem; - - /* Mutex to control consumerd pid assignation */ - pthread_mutex_t pid_mutex; - pid_t pid; - - int err_sock; - int cmd_sock; - - /* consumer error and command Unix socket path */ - char err_unix_sock_path[PATH_MAX]; - char cmd_unix_sock_path[PATH_MAX]; -}; - /* Const values */ const char default_home_dir[] = DEFAULT_HOME_DIR; const char default_tracing_group[] = DEFAULT_TRACING_GROUP; @@ -178,6 +162,40 @@ static const char *consumerd64_bin = CONFIG_CONSUMERD64_BIN; static const char *consumerd32_libdir = CONFIG_CONSUMERD32_LIBDIR; static const char *consumerd64_libdir = CONFIG_CONSUMERD64_LIBDIR; +/* + * Consumer daemon state which is changed when spawning it, killing it or in + * case of a fatal error. + */ +enum consumerd_state { + CONSUMER_STARTED = 1, + CONSUMER_STOPPED = 2, + CONSUMER_ERROR = 3, +}; + +/* + * This consumer daemon state is used to validate if a client command will be + * able to reach the consumer. If not, the client is informed. For instance, + * doing a "lttng start" when the consumer state is set to ERROR will return an + * error to the client. + * + * The following example shows a possible race condition of this scheme: + * + * consumer thread error happens + * client cmd arrives + * client cmd checks state -> still OK + * consumer thread exit, sets error + * client cmd try to talk to consumer + * ... + * + * However, since the consumer is a different daemon, we have no way of making + * sure the command will reach it safely even with this state flag. This is why + * we consider that up to the state validation during command processing, the + * command is safe. After that, we can not guarantee the correctness of the + * client request vis-a-vis the consumer. + */ +static enum consumerd_state ust_consumerd_state; +static enum consumerd_state kernel_consumerd_state; + static void setup_consumerd_path(void) { @@ -389,7 +407,7 @@ static void stop_threads(void) */ static void cleanup(void) { - int ret, i; + int ret; char *cmd; struct ltt_session *sess, *stmp; @@ -439,35 +457,9 @@ static void cleanup(void) DBG("Unloading kernel modules"); modprobe_remove_lttng_all(); } - - /* - * Closing all pipes used for communication between threads. - */ - for (i = 0; i < 2; i++) { - if (kernel_poll_pipe[i] >= 0) { - ret = close(kernel_poll_pipe[i]); - if (ret) { - PERROR("close"); - } - - } - } - for (i = 0; i < 2; i++) { - if (thread_quit_pipe[i] >= 0) { - ret = close(thread_quit_pipe[i]); - if (ret) { - PERROR("close"); - } - } - } - for (i = 0; i < 2; i++) { - if (apps_cmd_pipe[i] >= 0) { - ret = close(apps_cmd_pipe[i]); - if (ret) { - PERROR("close"); - } - } - } + utils_close_pipe(kernel_poll_pipe); + utils_close_pipe(thread_quit_pipe); + utils_close_pipe(apps_cmd_pipe); /* */ DBG("%c[%d;%dm*** assert failed :-) *** ==> %c[%dm%c[%d;%dm" @@ -509,139 +501,6 @@ static void clean_command_ctx(struct command_ctx **cmd_ctx) } } -/* - * Send all stream fds of kernel channel to the consumer. - */ -static int send_kconsumer_channel_streams(struct consumer_data *consumer_data, - int sock, struct ltt_kernel_channel *channel, - uid_t uid, gid_t gid) -{ - int ret; - struct ltt_kernel_stream *stream; - struct lttcomm_consumer_msg lkm; - - DBG("Sending streams of channel %s to kernel consumer", - channel->channel->name); - - /* Send channel */ - lkm.cmd_type = LTTNG_CONSUMER_ADD_CHANNEL; - lkm.u.channel.channel_key = channel->fd; - lkm.u.channel.max_sb_size = channel->channel->attr.subbuf_size; - lkm.u.channel.mmap_len = 0; /* for kernel */ - DBG("Sending channel %d to consumer", lkm.u.channel.channel_key); - ret = lttcomm_send_unix_sock(sock, &lkm, sizeof(lkm)); - if (ret < 0) { - PERROR("send consumer channel"); - goto error; - } - - /* Send streams */ - cds_list_for_each_entry(stream, &channel->stream_list.head, list) { - if (!stream->fd) { - continue; - } - lkm.cmd_type = LTTNG_CONSUMER_ADD_STREAM; - lkm.u.stream.channel_key = channel->fd; - lkm.u.stream.stream_key = stream->fd; - lkm.u.stream.state = stream->state; - lkm.u.stream.output = channel->channel->attr.output; - lkm.u.stream.mmap_len = 0; /* for kernel */ - lkm.u.stream.uid = uid; - lkm.u.stream.gid = gid; - strncpy(lkm.u.stream.path_name, stream->pathname, PATH_MAX - 1); - lkm.u.stream.path_name[PATH_MAX - 1] = '\0'; - DBG("Sending stream %d to consumer", lkm.u.stream.stream_key); - ret = lttcomm_send_unix_sock(sock, &lkm, sizeof(lkm)); - if (ret < 0) { - PERROR("send consumer stream"); - goto error; - } - ret = lttcomm_send_fds_unix_sock(sock, &stream->fd, 1); - if (ret < 0) { - PERROR("send consumer stream ancillary data"); - goto error; - } - } - - DBG("consumer channel streams sent"); - - return 0; - -error: - return ret; -} - -/* - * Send all stream fds of the kernel session to the consumer. - */ -static int send_kconsumer_session_streams(struct consumer_data *consumer_data, - struct ltt_kernel_session *session) -{ - int ret; - struct ltt_kernel_channel *chan; - struct lttcomm_consumer_msg lkm; - int sock = session->consumer_fd; - - DBG("Sending metadata stream fd"); - - /* Extra protection. It's NOT supposed to be set to -1 at this point */ - if (session->consumer_fd < 0) { - session->consumer_fd = consumer_data->cmd_sock; - } - - if (session->metadata_stream_fd >= 0) { - /* Send metadata channel fd */ - lkm.cmd_type = LTTNG_CONSUMER_ADD_CHANNEL; - lkm.u.channel.channel_key = session->metadata->fd; - lkm.u.channel.max_sb_size = session->metadata->conf->attr.subbuf_size; - lkm.u.channel.mmap_len = 0; /* for kernel */ - DBG("Sending metadata channel %d to consumer", lkm.u.stream.stream_key); - ret = lttcomm_send_unix_sock(sock, &lkm, sizeof(lkm)); - if (ret < 0) { - PERROR("send consumer channel"); - goto error; - } - - /* Send metadata stream fd */ - lkm.cmd_type = LTTNG_CONSUMER_ADD_STREAM; - lkm.u.stream.channel_key = session->metadata->fd; - lkm.u.stream.stream_key = session->metadata_stream_fd; - lkm.u.stream.state = LTTNG_CONSUMER_ACTIVE_STREAM; - lkm.u.stream.output = DEFAULT_KERNEL_CHANNEL_OUTPUT; - lkm.u.stream.mmap_len = 0; /* for kernel */ - lkm.u.stream.uid = session->uid; - lkm.u.stream.gid = session->gid; - strncpy(lkm.u.stream.path_name, session->metadata->pathname, PATH_MAX - 1); - lkm.u.stream.path_name[PATH_MAX - 1] = '\0'; - DBG("Sending metadata stream %d to consumer", lkm.u.stream.stream_key); - ret = lttcomm_send_unix_sock(sock, &lkm, sizeof(lkm)); - if (ret < 0) { - PERROR("send consumer stream"); - goto error; - } - ret = lttcomm_send_fds_unix_sock(sock, &session->metadata_stream_fd, 1); - if (ret < 0) { - PERROR("send consumer stream"); - goto error; - } - } - - cds_list_for_each_entry(chan, &session->channel_list.head, list) { - ret = send_kconsumer_channel_streams(consumer_data, sock, chan, - session->uid, session->gid); - if (ret < 0) { - goto error; - } - } - - DBG("consumer fds (metadata and channel streams) sent"); - - return 0; - -error: - return ret; -} - /* * Notify UST applications using the shm mmap futex. */ @@ -783,7 +642,7 @@ static int update_kernel_stream(struct consumer_data *consumer_data, int fd) * stream fds. */ if (session->kernel_session->consumer_fds_sent == 1) { - ret = send_kconsumer_channel_streams(consumer_data, + ret = kernel_consumer_send_channel_stream(consumer_data, session->kernel_session->consumer_fd, channel, session->uid, session->gid); if (ret < 0) { @@ -1089,6 +948,17 @@ restart_poll: ERR("consumer return code : %s", lttcomm_get_readable_code(-code)); error: + /* Immediately set the consumerd state to stopped */ + if (consumer_data->type == LTTNG_CONSUMER_KERNEL) { + uatomic_set(&kernel_consumerd_state, CONSUMER_ERROR); + } else if (consumer_data->type == LTTNG_CONSUMER64_UST || + consumer_data->type == LTTNG_CONSUMER32_UST) { + uatomic_set(&ust_consumerd_state, CONSUMER_ERROR); + } else { + /* Code flow error... */ + assert(0); + } + if (consumer_data->err_sock >= 0) { ret = close(consumer_data->err_sock); if (ret) { @@ -1424,6 +1294,17 @@ static void *thread_registration_apps(void *data) * Using message-based transmissions to ensure we don't * have to deal with partially received messages. */ + ret = lttng_fd_get(LTTNG_FD_APPS, 1); + if (ret < 0) { + ERR("Exhausted file descriptors allowed for applications."); + free(ust_cmd); + ret = close(sock); + if (ret) { + PERROR("close"); + } + sock = -1; + continue; + } ret = lttcomm_recv_unix_sock(sock, &ust_cmd->reg_msg, sizeof(struct ust_register_msg)); if (ret < 0 || ret < sizeof(struct ust_register_msg)) { @@ -1437,6 +1318,7 @@ static void *thread_registration_apps(void *data) if (ret) { PERROR("close"); } + lttng_fd_put(LTTNG_FD_APPS, 1); sock = -1; continue; } @@ -1482,6 +1364,7 @@ error: if (ret) { PERROR("close"); } + lttng_fd_put(LTTNG_FD_APPS, 1); } unlink(apps_unix_sock_path); @@ -1873,7 +1756,7 @@ static int init_kernel_tracing(struct ltt_kernel_session *session) session->consumer_fd = kconsumer_data.cmd_sock; } - ret = send_kconsumer_session_streams(&kconsumer_data, session); + ret = kernel_consumer_send_session(&kconsumer_data, session); if (ret < 0) { ret = LTTCOMM_KERN_CONSUMER_FAIL; goto error; @@ -2832,6 +2715,36 @@ error: return -ret; } +/* + * Command LTTNG_LIST_TRACEPOINT_FIELDS processed by the client thread. + */ +static ssize_t cmd_list_tracepoint_fields(int domain, + struct lttng_event_field **fields) +{ + int ret; + ssize_t nb_fields = 0; + + switch (domain) { + case LTTNG_DOMAIN_UST: + nb_fields = ust_app_list_event_fields(fields); + if (nb_fields < 0) { + ret = LTTCOMM_UST_LIST_FAIL; + goto error; + } + break; + case LTTNG_DOMAIN_KERNEL: + default: /* fall-through */ + ret = LTTCOMM_UND; + goto error; + } + + return nb_fields; + +error: + /* Return negative value to differentiate return code */ + return -ret; +} + /* * Command LTTNG_START_TRACE processed by the client thread. */ @@ -3280,6 +3193,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx) switch(cmd_ctx->lsm->cmd_type) { case LTTNG_LIST_SESSIONS: case LTTNG_LIST_TRACEPOINTS: + case LTTNG_LIST_TRACEPOINT_FIELDS: case LTTNG_LIST_DOMAINS: case LTTNG_LIST_CHANNELS: case LTTNG_LIST_EVENTS: @@ -3299,13 +3213,18 @@ static int process_client_msg(struct command_ctx *cmd_ctx) case LTTNG_CALIBRATE: case LTTNG_LIST_SESSIONS: case LTTNG_LIST_TRACEPOINTS: + case LTTNG_LIST_TRACEPOINT_FIELDS: need_tracing_session = 0; break; default: DBG("Getting session %s by name", cmd_ctx->lsm->session.name); + /* + * We keep the session list lock across _all_ commands + * for now, because the per-session lock does not + * handle teardown properly. + */ session_lock_list(); cmd_ctx->session = session_find_by_name(cmd_ctx->lsm->session.name); - session_unlock_list(); if (cmd_ctx->session == NULL) { if (cmd_ctx->lsm->session.name != NULL) { ret = LTTCOMM_SESS_NOT_FOUND; @@ -3343,6 +3262,12 @@ static int process_client_msg(struct command_ctx *cmd_ctx) } } + /* Consumer is in an ERROR state. Report back to client */ + if (uatomic_read(&kernel_consumerd_state) == CONSUMER_ERROR) { + ret = LTTCOMM_NO_KERNCONSUMERD; + goto error; + } + /* Need a session for kernel command */ if (need_tracing_session) { if (cmd_ctx->session->kernel_session == NULL) { @@ -3363,13 +3288,21 @@ static int process_client_msg(struct command_ctx *cmd_ctx) ret = LTTCOMM_KERN_CONSUMER_FAIL; goto error; } + uatomic_set(&kernel_consumerd_state, CONSUMER_STARTED); } else { pthread_mutex_unlock(&kconsumer_data.pid_mutex); } } + break; case LTTNG_DOMAIN_UST: { + /* Consumer is in an ERROR state. Report back to client */ + if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) { + ret = LTTCOMM_NO_USTCONSUMERD; + goto error; + } + if (need_tracing_session) { if (cmd_ctx->session->ust_session == NULL) { ret = create_ust_session(cmd_ctx->session, @@ -3393,6 +3326,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx) } ust_consumerd64_fd = ustconsumer64_data.cmd_sock; + uatomic_set(&ust_consumerd_state, CONSUMER_STARTED); } else { pthread_mutex_unlock(&ustconsumer64_data.pid_mutex); } @@ -3407,7 +3341,9 @@ static int process_client_msg(struct command_ctx *cmd_ctx) ust_consumerd32_fd = -EINVAL; goto error; } + ust_consumerd32_fd = ustconsumer32_data.cmd_sock; + uatomic_set(&ust_consumerd_state, CONSUMER_STARTED); } else { pthread_mutex_unlock(&ustconsumer32_data.pid_mutex); } @@ -3419,6 +3355,25 @@ static int process_client_msg(struct command_ctx *cmd_ctx) } skip_domain: + /* Validate consumer daemon state when start/stop trace command */ + if (cmd_ctx->lsm->cmd_type == LTTNG_START_TRACE || + cmd_ctx->lsm->cmd_type == LTTNG_STOP_TRACE) { + switch (cmd_ctx->lsm->domain.type) { + case LTTNG_DOMAIN_UST: + if (uatomic_read(&ust_consumerd_state) != CONSUMER_STARTED) { + ret = LTTCOMM_NO_USTCONSUMERD; + goto error; + } + break; + case LTTNG_DOMAIN_KERNEL: + if (uatomic_read(&kernel_consumerd_state) != CONSUMER_STARTED) { + ret = LTTCOMM_NO_KERNCONSUMERD; + goto error; + } + break; + } + } + /* * Check that the UID or GID match that of the tracing session. * The root user can interact with all sessions. @@ -3515,6 +3470,37 @@ skip_domain: ret = LTTCOMM_OK; break; } + case LTTNG_LIST_TRACEPOINT_FIELDS: + { + struct lttng_event_field *fields; + ssize_t nb_fields; + + nb_fields = cmd_list_tracepoint_fields(cmd_ctx->lsm->domain.type, &fields); + if (nb_fields < 0) { + ret = -nb_fields; + goto error; + } + + /* + * Setup lttng message with payload size set to the event list size in + * bytes and then copy list into the llm payload. + */ + ret = setup_lttng_msg(cmd_ctx, sizeof(struct lttng_event_field) * nb_fields); + if (ret < 0) { + free(fields); + goto setup_error; + } + + /* Copy event list into message payload */ + memcpy(cmd_ctx->llm->payload, fields, + sizeof(struct lttng_event_field) * nb_fields); + + free(fields); + + ret = LTTCOMM_OK; + break; + } + case LTTNG_START_TRACE: { ret = cmd_start_trace(cmd_ctx->session); @@ -3535,6 +3521,11 @@ skip_domain: { ret = cmd_destroy_session(cmd_ctx->session, cmd_ctx->lsm->session.name); + /* + * Set session to NULL so we do not unlock it after + * free. + */ + cmd_ctx->session = NULL; break; } case LTTNG_LIST_DOMAINS: @@ -3564,7 +3555,7 @@ skip_domain: } case LTTNG_LIST_CHANNELS: { - size_t nb_chan; + int nb_chan; struct lttng_channel *channels; nb_chan = cmd_list_channels(cmd_ctx->lsm->domain.type, @@ -3669,6 +3660,9 @@ setup_error: if (cmd_ctx->session) { session_unlock(cmd_ctx->session); } + if (need_tracing_session) { + session_unlock_list(); + } init_setup_error: return ret; } @@ -3798,7 +3792,7 @@ static void *thread_manage_clients(void *data) PERROR("close"); } sock = -1; - free(cmd_ctx); + clean_command_ctx(&cmd_ctx); continue; } @@ -4091,13 +4085,15 @@ static int set_permissions(char *rundir) int ret; gid_t gid; - gid = allowed_group(); - if (gid < 0) { + ret = allowed_group(); + if (ret < 0) { WARN("No tracing group detected"); ret = 0; goto end; } + gid = ret; + /* Set lttng run dir */ ret = chown(rundir, 0, gid); if (ret < 0) { @@ -4146,58 +4142,6 @@ end: return ret; } -/* - * Create the pipe used to wake up the kernel thread. - * Closed in cleanup(). - */ -static int create_kernel_poll_pipe(void) -{ - int ret, i; - - ret = pipe(kernel_poll_pipe); - if (ret < 0) { - PERROR("kernel poll pipe"); - goto error; - } - - for (i = 0; i < 2; i++) { - ret = fcntl(kernel_poll_pipe[i], F_SETFD, FD_CLOEXEC); - if (ret < 0) { - PERROR("fcntl kernel_poll_pipe"); - goto error; - } - } - -error: - return ret; -} - -/* - * Create the application command pipe to wake thread_manage_apps. - * Closed in cleanup(). - */ -static int create_apps_cmd_pipe(void) -{ - int ret, i; - - ret = pipe(apps_cmd_pipe); - if (ret < 0) { - PERROR("apps cmd pipe"); - goto error; - } - - for (i = 0; i < 2; i++) { - ret = fcntl(apps_cmd_pipe[i], F_SETFD, FD_CLOEXEC); - if (ret < 0) { - PERROR("fcntl apps_cmd_pipe"); - goto error; - } - } - -error: - return ret; -} - /* * Create the lttng run directory needed for all global sockets and pipe. */ @@ -4376,11 +4320,6 @@ int main(int argc, char **argv) rcu_register_thread(); - /* Create thread quit pipe */ - if ((ret = init_thread_quit_pipe()) < 0) { - goto error; - } - setup_consumerd_path(); /* Parse arguments */ @@ -4391,11 +4330,31 @@ int main(int argc, char **argv) /* Daemonize */ if (opt_daemon) { + int i; + + /* + * fork + * child: setsid, close FD 0, 1, 2, chdir / + * parent: exit (if fork is successful) + */ ret = daemon(0, 0); if (ret < 0) { PERROR("daemon"); goto error; } + /* + * We are in the child. Make sure all other file + * descriptors are closed, in case we are called with + * more opened file descriptors than the standard ones. + */ + for (i = 3; i < sysconf(_SC_OPEN_MAX); i++) { + (void) close(i); + } + } + + /* Create thread quit pipe */ + if ((ret = init_thread_quit_pipe()) < 0) { + goto error; } /* Check if daemon is UID = 0 */ @@ -4478,6 +4437,10 @@ int main(int argc, char **argv) } } + /* Set consumer initial state */ + kernel_consumerd_state = CONSUMER_STOPPED; + ust_consumerd_state = CONSUMER_STOPPED; + DBG("Client socket path %s", client_unix_sock_path); DBG("Application socket path %s", apps_unix_sock_path); DBG("LTTng run directory path: %s", rundir); @@ -4516,6 +4479,12 @@ int main(int argc, char **argv) goto error; } + /* + * Init UST app hash table. Alloc hash table before this point since + * cleanup() can get called after that point. + */ + ust_app_ht_alloc(); + /* After this point, we can safely call cleanup() with "goto exit" */ /* @@ -4538,6 +4507,8 @@ int main(int argc, char **argv) /* Set ulimit for open files */ set_ulimit(); } + /* init lttng_fd tracking must be done after set_ulimit. */ + lttng_fd_init(); ret = set_consumer_sockets(&ustconsumer64_data, rundir); if (ret < 0) { @@ -4569,21 +4540,18 @@ int main(int argc, char **argv) } /* Setup the kernel pipe for waking up the kernel thread */ - if ((ret = create_kernel_poll_pipe()) < 0) { + if ((ret = utils_create_pipe_cloexec(kernel_poll_pipe)) < 0) { goto exit; } /* Setup the thread apps communication pipe. */ - if ((ret = create_apps_cmd_pipe()) < 0) { + if ((ret = utils_create_pipe_cloexec(apps_cmd_pipe)) < 0) { goto exit; } /* Init UST command queue. */ cds_wfq_init(&ust_cmd_queue.queue); - /* Init UST app hash table */ - ust_app_ht_alloc(); - /* * Get session list pointer. This pointer MUST NOT be free(). This list is * statically declared in session.c