X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-sessiond%2Fmain.c;h=4b6a39dd5ecea5ef176139e7da2c6058f5884094;hp=8e4afb00ba0827295d86f53ed728763eb5ebe463;hb=e8209f6b352b3aa279d8d452e396adef6f7159c7;hpb=dad47fc4c65ec0acbfedc676895833b465d25cd6 diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c index 8e4afb00b..4b6a39dd5 100644 --- a/src/bin/lttng-sessiond/main.c +++ b/src/bin/lttng-sessiond/main.c @@ -2,22 +2,21 @@ * Copyright (C) 2011 - David Goulet * Mathieu Desnoyers * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; only version 2 of the License. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2 only, + * as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307, USA. + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #define _GNU_SOURCE -#include #include #include #include @@ -34,61 +33,47 @@ #include #include #include -#include +#include #include #include #include #include +#include #include #include -#include +#include +#include +#include #include "lttng-sessiond.h" #include "channel.h" +#include "consumer.h" #include "context.h" #include "event.h" -#include "futex.h" #include "kernel.h" +#include "kernel-consumer.h" #include "modprobe.h" #include "shm.h" #include "ust-ctl.h" +#include "ust-consumer.h" #include "utils.h" +#include "fd-limit.h" +#include "filter.h" +#include "health.h" #define CONSUMERD_FILE "lttng-consumerd" -struct consumer_data { - enum lttng_consumer_type type; - - pthread_t thread; /* Worker thread interacting with the consumer */ - sem_t sem; - - /* Mutex to control consumerd pid assignation */ - pthread_mutex_t pid_mutex; - pid_t pid; - - int err_sock; - int cmd_sock; - - /* consumer error and command Unix socket path */ - char err_unix_sock_path[PATH_MAX]; - char cmd_unix_sock_path[PATH_MAX]; -}; - /* Const values */ const char default_home_dir[] = DEFAULT_HOME_DIR; const char default_tracing_group[] = DEFAULT_TRACING_GROUP; const char default_ust_sock_dir[] = DEFAULT_UST_SOCK_DIR; const char default_global_apps_pipe[] = DEFAULT_GLOBAL_APPS_PIPE; -/* Variables */ -int opt_verbose; /* Not static for lttngerr.h */ -int opt_verbose_consumer; /* Not static for lttngerr.h */ -int opt_quiet; /* Not static for lttngerr.h */ - const char *progname; const char *opt_tracing_group; static int opt_sig_parent; +static int opt_verbose_consumer; static int opt_daemon; static int opt_no_kernel; static int is_root; /* Set to 1 if the daemon is running as root */ @@ -100,18 +85,31 @@ static struct consumer_data kconsumer_data = { .type = LTTNG_CONSUMER_KERNEL, .err_unix_sock_path = DEFAULT_KCONSUMERD_ERR_SOCK_PATH, .cmd_unix_sock_path = DEFAULT_KCONSUMERD_CMD_SOCK_PATH, + .err_sock = -1, + .cmd_sock = -1, + .pid_mutex = PTHREAD_MUTEX_INITIALIZER, + .lock = PTHREAD_MUTEX_INITIALIZER, }; static struct consumer_data ustconsumer64_data = { .type = LTTNG_CONSUMER64_UST, .err_unix_sock_path = DEFAULT_USTCONSUMERD64_ERR_SOCK_PATH, .cmd_unix_sock_path = DEFAULT_USTCONSUMERD64_CMD_SOCK_PATH, + .err_sock = -1, + .cmd_sock = -1, + .pid_mutex = PTHREAD_MUTEX_INITIALIZER, + .lock = PTHREAD_MUTEX_INITIALIZER, }; static struct consumer_data ustconsumer32_data = { .type = LTTNG_CONSUMER32_UST, .err_unix_sock_path = DEFAULT_USTCONSUMERD32_ERR_SOCK_PATH, .cmd_unix_sock_path = DEFAULT_USTCONSUMERD32_CMD_SOCK_PATH, + .err_sock = -1, + .cmd_sock = -1, + .pid_mutex = PTHREAD_MUTEX_INITIALIZER, + .lock = PTHREAD_MUTEX_INITIALIZER, }; +/* Shared between threads */ static int dispatch_thread_exit; /* Global application Unix socket path */ @@ -120,24 +118,26 @@ static char apps_unix_sock_path[PATH_MAX]; static char client_unix_sock_path[PATH_MAX]; /* global wait shm path for UST */ static char wait_shm_path[PATH_MAX]; +/* Global health check unix path */ +static char health_unix_sock_path[PATH_MAX]; /* Sockets and FDs */ -static int client_sock; -static int apps_sock; -static int kernel_tracer_fd; -static int kernel_poll_pipe[2]; +static int client_sock = -1; +static int apps_sock = -1; +static int kernel_tracer_fd = -1; +static int kernel_poll_pipe[2] = { -1, -1 }; /* * Quit pipe for all threads. This permits a single cancellation point * for all threads when receiving an event on the pipe. */ -static int thread_quit_pipe[2]; +static int thread_quit_pipe[2] = { -1, -1 }; /* * This pipe is used to inform the thread managing application communication * that a command is queued and ready to be processed. */ -static int apps_cmd_pipe[2]; +static int apps_cmd_pipe[2] = { -1, -1 }; /* Pthread, Mutexes and Semaphores */ static pthread_t apps_thread; @@ -145,7 +145,7 @@ static pthread_t reg_apps_thread; static pthread_t client_thread; static pthread_t kernel_thread; static pthread_t dispatch_thread; - +static pthread_t health_thread; /* * UST registration command queue. This queue is tied with a futex and uses a N @@ -176,6 +176,56 @@ static const char *consumerd64_bin = CONFIG_CONSUMERD64_BIN; static const char *consumerd32_libdir = CONFIG_CONSUMERD32_LIBDIR; static const char *consumerd64_libdir = CONFIG_CONSUMERD64_LIBDIR; +/* + * Consumer daemon state which is changed when spawning it, killing it or in + * case of a fatal error. + */ +enum consumerd_state { + CONSUMER_STARTED = 1, + CONSUMER_STOPPED = 2, + CONSUMER_ERROR = 3, +}; + +/* + * This consumer daemon state is used to validate if a client command will be + * able to reach the consumer. If not, the client is informed. For instance, + * doing a "lttng start" when the consumer state is set to ERROR will return an + * error to the client. + * + * The following example shows a possible race condition of this scheme: + * + * consumer thread error happens + * client cmd arrives + * client cmd checks state -> still OK + * consumer thread exit, sets error + * client cmd try to talk to consumer + * ... + * + * However, since the consumer is a different daemon, we have no way of making + * sure the command will reach it safely even with this state flag. This is why + * we consider that up to the state validation during command processing, the + * command is safe. After that, we can not guarantee the correctness of the + * client request vis-a-vis the consumer. + */ +static enum consumerd_state ust_consumerd_state; +static enum consumerd_state kernel_consumerd_state; + +/* + * Used to keep a unique index for each relayd socket created where this value + * is associated with streams on the consumer so it can match the right relayd + * to send to. + * + * This value should be incremented atomically for safety purposes and future + * possible concurrent access. + */ +static unsigned int relayd_net_seq_idx; + +/* Used for the health monitoring of the session daemon. See health.h */ +struct health_state health_thread_cmd; +struct health_state health_thread_app_manage; +struct health_state health_thread_app_reg; +struct health_state health_thread_kernel; + static void setup_consumerd_path(void) { @@ -295,14 +345,22 @@ static gid_t allowed_group(void) */ static int init_thread_quit_pipe(void) { - int ret; + int ret, i; - ret = pipe2(thread_quit_pipe, O_CLOEXEC); + ret = pipe(thread_quit_pipe); if (ret < 0) { - perror("thread quit pipe"); + PERROR("thread quit pipe"); goto error; } + for (i = 0; i < 2; i++) { + ret = fcntl(thread_quit_pipe[i], F_SETFD, FD_CLOEXEC); + if (ret < 0) { + PERROR("fcntl"); + goto error; + } + } + error: return ret; } @@ -313,22 +371,56 @@ error: */ static void teardown_kernel_session(struct ltt_session *session) { + int ret; + struct lttng_ht_iter iter; + struct ltt_kernel_session *ksess; + struct consumer_socket *socket; + if (!session->kernel_session) { DBG3("No kernel session when tearing down session"); return; } + ksess = session->kernel_session; + DBG("Tearing down kernel session"); + /* + * Destroy relayd associated with the session consumer. This action is + * valid since in order to destroy a session we must acquire the session + * lock. This means that there CAN NOT be stream(s) being sent to a + * consumer since this action also requires the session lock at any time. + * + * At this point, we are sure that not streams data will be lost after this + * command is issued. + */ + if (ksess->consumer && ksess->consumer->type == CONSUMER_DST_NET) { + cds_lfht_for_each_entry(ksess->consumer->socks->ht, &iter.iter, socket, + node.node) { + ret = consumer_send_destroy_relayd(socket, ksess->consumer); + if (ret < 0) { + ERR("Unable to send destroy relayd command to consumer"); + /* Continue since we MUST delete everything at this point. */ + } + } + } + /* * If a custom kernel consumer was registered, close the socket before * tearing down the complete kernel session structure */ - if (session->kernel_session->consumer_fd != kconsumer_data.cmd_sock) { - lttcomm_close_unix_sock(session->kernel_session->consumer_fd); + cds_lfht_for_each_entry(ksess->consumer->socks->ht, &iter.iter, socket, + node.node) { + if (socket->fd != kconsumer_data.cmd_sock) { + rcu_read_lock(); + consumer_del_socket(socket, ksess->consumer); + lttcomm_close_unix_sock(socket->fd); + consumer_destroy_socket(socket); + rcu_read_unlock(); + } } - trace_kernel_destroy_session(session->kernel_session); + trace_kernel_destroy_session(ksess); } /* @@ -338,20 +430,44 @@ static void teardown_kernel_session(struct ltt_session *session) static void teardown_ust_session(struct ltt_session *session) { int ret; + struct lttng_ht_iter iter; + struct ltt_ust_session *usess; + struct consumer_socket *socket; if (!session->ust_session) { DBG3("No UST session when tearing down session"); return; } + usess = session->ust_session; DBG("Tearing down UST session(s)"); - ret = ust_app_destroy_trace_all(session->ust_session); + /* + * Destroy relayd associated with the session consumer. This action is + * valid since in order to destroy a session we must acquire the session + * lock. This means that there CAN NOT be stream(s) being sent to a + * consumer since this action also requires the session lock at any time. + * + * At this point, we are sure that not streams data will be lost after this + * command is issued. + */ + if (usess->consumer && usess->consumer->type == CONSUMER_DST_NET) { + cds_lfht_for_each_entry(usess->consumer->socks->ht, &iter.iter, socket, + node.node) { + ret = consumer_send_destroy_relayd(socket, usess->consumer); + if (ret < 0) { + ERR("Unable to send destroy relayd command to consumer"); + /* Continue since we MUST delete everything at this point. */ + } + } + } + + ret = ust_app_destroy_trace_all(usess); if (ret) { ERR("Error in ust_app_destroy_trace_all"); } - trace_ust_destroy_session(session->ust_session); + trace_ust_destroy_session(usess); } /* @@ -369,7 +485,7 @@ static void stop_threads(void) } /* Dispatch thread */ - dispatch_thread_exit = 1; + CMM_STORE_SHARED(dispatch_thread_exit, 1); futex_nto1_wake(&ust_cmd_queue.futex); } @@ -415,17 +531,20 @@ static void cleanup(void) DBG("Closing all UST sockets"); ust_app_clean_list(); - pthread_mutex_destroy(&kconsumer_data.pid_mutex); - if (is_root && !opt_no_kernel) { DBG2("Closing kernel fd"); - close(kernel_tracer_fd); + if (kernel_tracer_fd >= 0) { + ret = close(kernel_tracer_fd); + if (ret) { + PERROR("close"); + } + } DBG("Unloading kernel modules"); modprobe_remove_lttng_all(); } - - close(thread_quit_pipe[0]); - close(thread_quit_pipe[1]); + utils_close_pipe(kernel_poll_pipe); + utils_close_pipe(thread_quit_pipe); + utils_close_pipe(apps_cmd_pipe); /* */ DBG("%c[%d;%dm*** assert failed :-) *** ==> %c[%dm%c[%d;%dm" @@ -467,139 +586,6 @@ static void clean_command_ctx(struct command_ctx **cmd_ctx) } } -/* - * Send all stream fds of kernel channel to the consumer. - */ -static int send_kconsumer_channel_streams(struct consumer_data *consumer_data, - int sock, struct ltt_kernel_channel *channel, - uid_t uid, gid_t gid) -{ - int ret; - struct ltt_kernel_stream *stream; - struct lttcomm_consumer_msg lkm; - - DBG("Sending streams of channel %s to kernel consumer", - channel->channel->name); - - /* Send channel */ - lkm.cmd_type = LTTNG_CONSUMER_ADD_CHANNEL; - lkm.u.channel.channel_key = channel->fd; - lkm.u.channel.max_sb_size = channel->channel->attr.subbuf_size; - lkm.u.channel.mmap_len = 0; /* for kernel */ - DBG("Sending channel %d to consumer", lkm.u.channel.channel_key); - ret = lttcomm_send_unix_sock(sock, &lkm, sizeof(lkm)); - if (ret < 0) { - perror("send consumer channel"); - goto error; - } - - /* Send streams */ - cds_list_for_each_entry(stream, &channel->stream_list.head, list) { - if (!stream->fd) { - continue; - } - lkm.cmd_type = LTTNG_CONSUMER_ADD_STREAM; - lkm.u.stream.channel_key = channel->fd; - lkm.u.stream.stream_key = stream->fd; - lkm.u.stream.state = stream->state; - lkm.u.stream.output = channel->channel->attr.output; - lkm.u.stream.mmap_len = 0; /* for kernel */ - lkm.u.stream.uid = uid; - lkm.u.stream.gid = gid; - strncpy(lkm.u.stream.path_name, stream->pathname, PATH_MAX - 1); - lkm.u.stream.path_name[PATH_MAX - 1] = '\0'; - DBG("Sending stream %d to consumer", lkm.u.stream.stream_key); - ret = lttcomm_send_unix_sock(sock, &lkm, sizeof(lkm)); - if (ret < 0) { - perror("send consumer stream"); - goto error; - } - ret = lttcomm_send_fds_unix_sock(sock, &stream->fd, 1); - if (ret < 0) { - perror("send consumer stream ancillary data"); - goto error; - } - } - - DBG("consumer channel streams sent"); - - return 0; - -error: - return ret; -} - -/* - * Send all stream fds of the kernel session to the consumer. - */ -static int send_kconsumer_session_streams(struct consumer_data *consumer_data, - struct ltt_kernel_session *session) -{ - int ret; - struct ltt_kernel_channel *chan; - struct lttcomm_consumer_msg lkm; - int sock = session->consumer_fd; - - DBG("Sending metadata stream fd"); - - /* Extra protection. It's NOT supposed to be set to 0 at this point */ - if (session->consumer_fd == 0) { - session->consumer_fd = consumer_data->cmd_sock; - } - - if (session->metadata_stream_fd != 0) { - /* Send metadata channel fd */ - lkm.cmd_type = LTTNG_CONSUMER_ADD_CHANNEL; - lkm.u.channel.channel_key = session->metadata->fd; - lkm.u.channel.max_sb_size = session->metadata->conf->attr.subbuf_size; - lkm.u.channel.mmap_len = 0; /* for kernel */ - DBG("Sending metadata channel %d to consumer", lkm.u.stream.stream_key); - ret = lttcomm_send_unix_sock(sock, &lkm, sizeof(lkm)); - if (ret < 0) { - perror("send consumer channel"); - goto error; - } - - /* Send metadata stream fd */ - lkm.cmd_type = LTTNG_CONSUMER_ADD_STREAM; - lkm.u.stream.channel_key = session->metadata->fd; - lkm.u.stream.stream_key = session->metadata_stream_fd; - lkm.u.stream.state = LTTNG_CONSUMER_ACTIVE_STREAM; - lkm.u.stream.output = DEFAULT_KERNEL_CHANNEL_OUTPUT; - lkm.u.stream.mmap_len = 0; /* for kernel */ - lkm.u.stream.uid = session->uid; - lkm.u.stream.gid = session->gid; - strncpy(lkm.u.stream.path_name, session->metadata->pathname, PATH_MAX - 1); - lkm.u.stream.path_name[PATH_MAX - 1] = '\0'; - DBG("Sending metadata stream %d to consumer", lkm.u.stream.stream_key); - ret = lttcomm_send_unix_sock(sock, &lkm, sizeof(lkm)); - if (ret < 0) { - perror("send consumer stream"); - goto error; - } - ret = lttcomm_send_fds_unix_sock(sock, &session->metadata_stream_fd, 1); - if (ret < 0) { - perror("send consumer stream"); - goto error; - } - } - - cds_list_for_each_entry(chan, &session->channel_list.head, list) { - ret = send_kconsumer_channel_streams(consumer_data, sock, chan, - session->uid, session->gid); - if (ret < 0) { - goto error; - } - } - - DBG("consumer fds (metadata and channel streams) sent"); - - return 0; - -error: - return ret; -} - /* * Notify UST applications using the shm mmap futex. */ @@ -640,7 +626,7 @@ static int setup_lttng_msg(struct command_ctx *cmd_ctx, size_t size) cmd_ctx->llm = zmalloc(sizeof(struct lttcomm_lttng_msg) + buf_size); if (cmd_ctx->llm == NULL) { - perror("zmalloc"); + PERROR("zmalloc"); ret = -ENOMEM; goto error; } @@ -709,6 +695,7 @@ static int update_kernel_stream(struct consumer_data *consumer_data, int fd) { int ret = 0; struct ltt_session *session; + struct ltt_kernel_session *ksess; struct ltt_kernel_channel *channel; DBG("Updating kernel streams for channel fd %d", fd); @@ -720,14 +707,9 @@ static int update_kernel_stream(struct consumer_data *consumer_data, int fd) session_unlock(session); continue; } + ksess = session->kernel_session; - /* This is not suppose to be 0 but this is an extra security check */ - if (session->kernel_session->consumer_fd == 0) { - session->kernel_session->consumer_fd = consumer_data->cmd_sock; - } - - cds_list_for_each_entry(channel, - &session->kernel_session->channel_list.head, list) { + cds_list_for_each_entry(channel, &ksess->channel_list.head, list) { if (channel->fd == fd) { DBG("Channel found, updating kernel streams"); ret = kernel_open_channel_stream(channel); @@ -740,12 +722,23 @@ static int update_kernel_stream(struct consumer_data *consumer_data, int fd) * that tracing is started so it is safe to send our updated * stream fds. */ - if (session->kernel_session->consumer_fds_sent == 1) { - ret = send_kconsumer_channel_streams(consumer_data, - session->kernel_session->consumer_fd, channel, - session->uid, session->gid); - if (ret < 0) { - goto error; + if (ksess->consumer_fds_sent == 1 && ksess->consumer != NULL) { + struct lttng_ht_iter iter; + struct consumer_socket *socket; + + + cds_lfht_for_each_entry(ksess->consumer->socks->ht, + &iter.iter, socket, node.node) { + /* Code flow error */ + assert(socket->fd >= 0); + + pthread_mutex_lock(socket->lock); + ret = kernel_consumer_send_channel_stream(socket->fd, + channel, ksess); + pthread_mutex_unlock(socket->lock); + if (ret < 0) { + goto error; + } } } goto error; @@ -769,12 +762,18 @@ static void update_ust_app(int app_sock) { struct ltt_session *sess, *stmp; + session_lock_list(); + /* For all tracing session(s) */ cds_list_for_each_entry_safe(sess, stmp, &session_list_ptr->head, list) { + session_lock(sess); if (sess->ust_session) { ust_app_global_update(sess->ust_session, app_sock); } + session_unlock(sess); } + + session_unlock_list(); } /* @@ -785,16 +784,18 @@ static void update_ust_app(int app_sock) */ static void *thread_manage_kernel(void *data) { - int ret, i, pollfd, update_poll_flag = 1; + int ret, i, pollfd, update_poll_flag = 1, err = -1; uint32_t revents, nb_fd; char tmp; struct lttng_poll_event events; DBG("Thread manage kernel started"); + health_code_update(&health_thread_kernel); + ret = create_thread_poll_set(&events, 2); if (ret < 0) { - goto error; + goto error_poll_create; } ret = lttng_poll_add(&events, kernel_poll_pipe[0], LPOLLIN); @@ -803,6 +804,8 @@ static void *thread_manage_kernel(void *data) } while (1) { + health_code_update(&health_thread_kernel); + if (update_poll_flag == 1) { /* * Reset number of fd in the poll set. Always 2 since there is the thread @@ -825,8 +828,17 @@ static void *thread_manage_kernel(void *data) lttng_poll_reset(&events); /* Poll infinite value of time */ + restart: + health_poll_update(&health_thread_kernel); ret = lttng_poll_wait(&events, -1); + health_poll_update(&health_thread_kernel); if (ret < 0) { + /* + * Restart interrupted system call. + */ + if (errno == EINTR) { + goto restart; + } goto error; } else if (ret == 0) { /* Should not happen since timeout is infinite */ @@ -840,10 +852,13 @@ static void *thread_manage_kernel(void *data) revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); + health_code_update(&health_thread_kernel); + /* Thread quit pipe has been closed. Killing thread. */ ret = check_thread_quit_pipe(pollfd, revents); if (ret) { - goto error; + err = 0; + goto exit; } /* Check for data on kernel pipe */ @@ -871,13 +886,16 @@ static void *thread_manage_kernel(void *data) } } +exit: error: - DBG("Kernel thread dying"); - close(kernel_poll_pipe[0]); - close(kernel_poll_pipe[1]); - lttng_poll_clean(&events); - +error_poll_create: + if (err) { + health_error(&health_thread_kernel); + ERR("Health error occurred in %s", __func__); + } + health_exit(&health_thread_kernel); + DBG("Kernel thread dying"); return NULL; } @@ -886,7 +904,7 @@ error: */ static void *thread_manage_consumer(void *data) { - int sock = 0, i, ret, pollfd; + int sock = -1, i, ret, pollfd, err = -1; uint32_t revents, nb_fd; enum lttcomm_return_code code; struct lttng_poll_event events; @@ -894,9 +912,11 @@ static void *thread_manage_consumer(void *data) DBG("[thread] Manage consumer started"); + health_code_update(&consumer_data->health); + ret = lttcomm_listen_unix_sock(consumer_data->err_sock); if (ret < 0) { - goto error; + goto error_listen; } /* @@ -905,7 +925,7 @@ static void *thread_manage_consumer(void *data) */ ret = create_thread_poll_set(&events, 2); if (ret < 0) { - goto error; + goto error_poll; } ret = lttng_poll_add(&events, consumer_data->err_sock, LPOLLIN | LPOLLRDHUP); @@ -915,9 +935,20 @@ static void *thread_manage_consumer(void *data) nb_fd = LTTNG_POLL_GETNB(&events); + health_code_update(&consumer_data->health); + /* Inifinite blocking call, waiting for transmission */ +restart: + health_poll_update(&consumer_data->health); ret = lttng_poll_wait(&events, -1); + health_poll_update(&consumer_data->health); if (ret < 0) { + /* + * Restart interrupted system call. + */ + if (errno == EINTR) { + goto restart; + } goto error; } @@ -926,10 +957,13 @@ static void *thread_manage_consumer(void *data) revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); + health_code_update(&consumer_data->health); + /* Thread quit pipe has been closed. Killing thread. */ ret = check_thread_quit_pipe(pollfd, revents); if (ret) { - goto error; + err = 0; + goto exit; } /* Event on the registration socket */ @@ -946,6 +980,8 @@ static void *thread_manage_consumer(void *data) goto error; } + health_code_update(&consumer_data->health); + DBG2("Receiving code from consumer err_sock"); /* Getting status code from kconsumerd */ @@ -955,6 +991,8 @@ static void *thread_manage_consumer(void *data) goto error; } + health_code_update(&consumer_data->health); + if (code == CONSUMERD_COMMAND_SOCK_READY) { consumer_data->cmd_sock = lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path); @@ -983,12 +1021,23 @@ static void *thread_manage_consumer(void *data) goto error; } + health_code_update(&consumer_data->health); + /* Update number of fd */ nb_fd = LTTNG_POLL_GETNB(&events); /* Inifinite blocking call, waiting for transmission */ +restart_poll: + health_poll_update(&consumer_data->health); ret = lttng_poll_wait(&events, -1); + health_poll_update(&consumer_data->health); if (ret < 0) { + /* + * Restart interrupted system call. + */ + if (errno == EINTR) { + goto restart_poll; + } goto error; } @@ -997,10 +1046,13 @@ static void *thread_manage_consumer(void *data) revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); + health_code_update(&consumer_data->health); + /* Thread quit pipe has been closed. Killing thread. */ ret = check_thread_quit_pipe(pollfd, revents); if (ret) { - goto error; + err = 0; + goto exit; } /* Event on the kconsumerd socket */ @@ -1012,6 +1064,8 @@ static void *thread_manage_consumer(void *data) } } + health_code_update(&consumer_data->health); + /* Wait for any kconsumerd error */ ret = lttcomm_recv_unix_sock(sock, &code, sizeof(enum lttcomm_return_code)); @@ -1022,17 +1076,51 @@ static void *thread_manage_consumer(void *data) ERR("consumer return code : %s", lttcomm_get_readable_code(-code)); +exit: error: - DBG("consumer thread dying"); - close(consumer_data->err_sock); - close(consumer_data->cmd_sock); - close(sock); + /* Immediately set the consumerd state to stopped */ + if (consumer_data->type == LTTNG_CONSUMER_KERNEL) { + uatomic_set(&kernel_consumerd_state, CONSUMER_ERROR); + } else if (consumer_data->type == LTTNG_CONSUMER64_UST || + consumer_data->type == LTTNG_CONSUMER32_UST) { + uatomic_set(&ust_consumerd_state, CONSUMER_ERROR); + } else { + /* Code flow error... */ + assert(0); + } + + if (consumer_data->err_sock >= 0) { + ret = close(consumer_data->err_sock); + if (ret) { + PERROR("close"); + } + } + if (consumer_data->cmd_sock >= 0) { + ret = close(consumer_data->cmd_sock); + if (ret) { + PERROR("close"); + } + } + if (sock >= 0) { + ret = close(sock); + if (ret) { + PERROR("close"); + } + } unlink(consumer_data->err_unix_sock_path); unlink(consumer_data->cmd_unix_sock_path); consumer_data->pid = 0; lttng_poll_clean(&events); +error_poll: +error_listen: + if (err) { + health_error(&consumer_data->health); + ERR("Health error occurred in %s", __func__); + } + health_exit(&consumer_data->health); + DBG("consumer thread cleanup completed"); return NULL; } @@ -1042,7 +1130,7 @@ error: */ static void *thread_manage_apps(void *data) { - int i, ret, pollfd; + int i, ret, pollfd, err = -1; uint32_t revents, nb_fd; struct ust_command ust_cmd; struct lttng_poll_event events; @@ -1052,9 +1140,11 @@ static void *thread_manage_apps(void *data) rcu_register_thread(); rcu_thread_online(); + health_code_update(&health_thread_app_manage); + ret = create_thread_poll_set(&events, 2); if (ret < 0) { - goto error; + goto error_poll_create; } ret = lttng_poll_add(&events, apps_cmd_pipe[0], LPOLLIN | LPOLLRDHUP); @@ -1062,6 +1152,8 @@ static void *thread_manage_apps(void *data) goto error; } + health_code_update(&health_thread_app_manage); + while (1) { /* Zeroed the events structure */ lttng_poll_reset(&events); @@ -1071,8 +1163,17 @@ static void *thread_manage_apps(void *data) DBG("Apps thread polling on %d fds", nb_fd); /* Inifinite blocking call, waiting for transmission */ + restart: + health_poll_update(&health_thread_app_manage); ret = lttng_poll_wait(&events, -1); + health_poll_update(&health_thread_app_manage); if (ret < 0) { + /* + * Restart interrupted system call. + */ + if (errno == EINTR) { + goto restart; + } goto error; } @@ -1081,10 +1182,13 @@ static void *thread_manage_apps(void *data) revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); + health_code_update(&health_thread_app_manage); + /* Thread quit pipe has been closed. Killing thread. */ ret = check_thread_quit_pipe(pollfd, revents); if (ret) { - goto error; + err = 0; + goto exit; } /* Inspect the apps cmd pipe */ @@ -1096,10 +1200,12 @@ static void *thread_manage_apps(void *data) /* Empty pipe */ ret = read(apps_cmd_pipe[0], &ust_cmd, sizeof(ust_cmd)); if (ret < 0 || ret < sizeof(ust_cmd)) { - perror("read apps cmd pipe"); + PERROR("read apps cmd pipe"); goto error; } + health_code_update(&health_thread_app_manage); + /* Register applicaton to the session daemon */ ret = ust_app_register(&ust_cmd.reg_msg, ust_cmd.sock); @@ -1109,6 +1215,8 @@ static void *thread_manage_apps(void *data) break; } + health_code_update(&health_thread_app_manage); + /* * Validate UST version compatibility. */ @@ -1121,6 +1229,8 @@ static void *thread_manage_apps(void *data) update_ust_app(ust_cmd.sock); } + health_code_update(&health_thread_app_manage); + ret = ust_app_register_done(ust_cmd.sock); if (ret < 0) { /* @@ -1132,8 +1242,10 @@ static void *thread_manage_apps(void *data) /* * We just need here to monitor the close of the UST * socket and poll set monitor those by default. + * Listen on POLLIN (even if we never expect any + * data) to ensure that hangup wakes us. */ - ret = lttng_poll_add(&events, ust_cmd.sock, 0); + ret = lttng_poll_add(&events, ust_cmd.sock, LPOLLIN); if (ret < 0) { goto error; } @@ -1142,6 +1254,8 @@ static void *thread_manage_apps(void *data) ust_cmd.sock); } + health_code_update(&health_thread_app_manage); + break; } } else { @@ -1161,16 +1275,21 @@ static void *thread_manage_apps(void *data) break; } } + + health_code_update(&health_thread_app_manage); } } +exit: error: - DBG("Application communication apps dying"); - close(apps_cmd_pipe[0]); - close(apps_cmd_pipe[1]); - lttng_poll_clean(&events); - +error_poll_create: + if (err) { + health_error(&health_thread_app_manage); + ERR("Health error occurred in %s", __func__); + } + health_exit(&health_thread_app_manage); + DBG("Application communication apps thread cleanup complete"); rcu_thread_offline(); rcu_unregister_thread(); return NULL; @@ -1188,7 +1307,7 @@ static void *thread_dispatch_ust_registration(void *data) DBG("[thread] Dispatch UST command started"); - while (!dispatch_thread_exit) { + while (!CMM_LOAD_SHARED(dispatch_thread_exit)) { /* Atomically prepare the queue futex */ futex_nto1_prepare(&ust_cmd_queue.futex); @@ -1217,7 +1336,7 @@ static void *thread_dispatch_ust_registration(void *data) ret = write(apps_cmd_pipe[1], ust_cmd, sizeof(struct ust_command)); if (ret < 0) { - perror("write apps cmd pipe"); + PERROR("write apps cmd pipe"); if (errno == EBADF) { /* * We can't inform the application thread to process @@ -1245,7 +1364,7 @@ error: */ static void *thread_registration_apps(void *data) { - int sock = 0, i, ret, pollfd; + int sock = -1, i, ret, pollfd, err = -1; uint32_t revents, nb_fd; struct lttng_poll_event events; /* @@ -1258,7 +1377,7 @@ static void *thread_registration_apps(void *data) ret = lttcomm_listen_unix_sock(apps_sock); if (ret < 0) { - goto error; + goto error_listen; } /* @@ -1267,13 +1386,13 @@ static void *thread_registration_apps(void *data) */ ret = create_thread_poll_set(&events, 2); if (ret < 0) { - goto error; + goto error_create_poll; } /* Add the application registration socket */ ret = lttng_poll_add(&events, apps_sock, LPOLLIN | LPOLLRDHUP); if (ret < 0) { - goto error; + goto error_poll_add; } /* Notify all applications to register */ @@ -1290,12 +1409,23 @@ static void *thread_registration_apps(void *data) nb_fd = LTTNG_POLL_GETNB(&events); /* Inifinite blocking call, waiting for transmission */ + restart: + health_poll_update(&health_thread_app_reg); ret = lttng_poll_wait(&events, -1); + health_poll_update(&health_thread_app_reg); if (ret < 0) { + /* + * Restart interrupted system call. + */ + if (errno == EINTR) { + goto restart; + } goto error; } for (i = 0; i < nb_fd; i++) { + health_code_update(&health_thread_app_reg); + /* Fetch once the poll data */ revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); @@ -1303,7 +1433,8 @@ static void *thread_registration_apps(void *data) /* Thread quit pipe has been closed. Killing thread. */ ret = check_thread_quit_pipe(pollfd, revents); if (ret) { - goto error; + err = 0; + goto exit; } /* Event on the registration socket */ @@ -1320,7 +1451,7 @@ static void *thread_registration_apps(void *data) /* Create UST registration command for enqueuing */ ust_cmd = zmalloc(sizeof(struct ust_command)); if (ust_cmd == NULL) { - perror("ust command zmalloc"); + PERROR("ust command zmalloc"); goto error; } @@ -1328,20 +1459,39 @@ static void *thread_registration_apps(void *data) * Using message-based transmissions to ensure we don't * have to deal with partially received messages. */ + ret = lttng_fd_get(LTTNG_FD_APPS, 1); + if (ret < 0) { + ERR("Exhausted file descriptors allowed for applications."); + free(ust_cmd); + ret = close(sock); + if (ret) { + PERROR("close"); + } + sock = -1; + continue; + } + health_code_update(&health_thread_app_reg); ret = lttcomm_recv_unix_sock(sock, &ust_cmd->reg_msg, sizeof(struct ust_register_msg)); if (ret < 0 || ret < sizeof(struct ust_register_msg)) { if (ret < 0) { - perror("lttcomm_recv_unix_sock register apps"); + PERROR("lttcomm_recv_unix_sock register apps"); } else { ERR("Wrong size received on apps register"); } free(ust_cmd); - close(sock); + ret = close(sock); + if (ret) { + PERROR("close"); + } + lttng_fd_put(LTTNG_FD_APPS, 1); + sock = -1; continue; } + health_code_update(&health_thread_app_reg); ust_cmd->sock = sock; + sock = -1; DBG("UST registration received with pid:%d ppid:%d uid:%d" " gid:%d sock:%d name:%s (version %d.%d)", @@ -1366,17 +1516,37 @@ static void *thread_registration_apps(void *data) } } +exit: error: - DBG("UST Registration thread dying"); + if (err) { + health_error(&health_thread_app_reg); + ERR("Health error occurred in %s", __func__); + } + health_exit(&health_thread_app_reg); /* Notify that the registration thread is gone */ notify_ust_apps(0); - close(apps_sock); - close(sock); + if (apps_sock >= 0) { + ret = close(apps_sock); + if (ret) { + PERROR("close"); + } + } + if (sock >= 0) { + ret = close(sock); + if (ret) { + PERROR("close"); + } + lttng_fd_put(LTTNG_FD_APPS, 1); + } unlink(apps_unix_sock_path); +error_poll_add: lttng_poll_clean(&events); +error_listen: +error_create_poll: + DBG("UST Registration thread cleanup complete"); return NULL; } @@ -1459,7 +1629,8 @@ static int join_consumer_thread(struct consumer_data *consumer_data) void *status; int ret; - if (consumer_data->pid != 0) { + /* Consumer pid must be a real one. */ + if (consumer_data->pid > 0) { ret = kill(consumer_data->pid, SIGTERM); if (ret) { ERR("Error killing consumer daemon"); @@ -1616,17 +1787,17 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) break; } default: - perror("unknown consumer type"); + PERROR("unknown consumer type"); exit(EXIT_FAILURE); } if (errno != 0) { - perror("kernel start consumer exec"); + PERROR("kernel start consumer exec"); } exit(EXIT_FAILURE); } else if (pid > 0) { ret = pid; } else { - perror("start consumer fork"); + PERROR("start consumer fork"); ret = -errno; } error: @@ -1672,6 +1843,23 @@ error: return ret; } +/* + * Compute health status of each consumer. If one of them is zero (bad + * state), we return 0. + */ +static int check_consumer_health(void) +{ + int ret; + + ret = health_check_state(&kconsumer_data.health) && + health_check_state(&ustconsumer32_data.health) && + health_check_state(&ustconsumer64_data.health); + + DBG3("Health consumer check %d", ret); + + return ret; +} + /* * Check version of the lttng-modules. */ @@ -1717,20 +1905,30 @@ static int init_kernel_tracer(void) error_version: modprobe_remove_lttng_control(); - close(kernel_tracer_fd); - kernel_tracer_fd = 0; + ret = close(kernel_tracer_fd); + if (ret) { + PERROR("close"); + } + kernel_tracer_fd = -1; return LTTCOMM_KERN_VERSION; error_modules: - close(kernel_tracer_fd); + ret = close(kernel_tracer_fd); + if (ret) { + PERROR("close"); + } error_open: modprobe_remove_lttng_control(); error: WARN("No kernel tracer available"); - kernel_tracer_fd = 0; - return LTTCOMM_KERN_NA; + kernel_tracer_fd = -1; + if (!is_root) { + return LTTCOMM_NEED_ROOT_SESSIOND; + } else { + return LTTCOMM_KERN_NA; + } } /* @@ -1739,24 +1937,25 @@ error: static int init_kernel_tracing(struct ltt_kernel_session *session) { int ret = 0; + struct lttng_ht_iter iter; + struct consumer_socket *socket; - if (session->consumer_fds_sent == 0) { - /* - * Assign default kernel consumer socket if no consumer assigned to the - * kernel session. At this point, it's NOT suppose to be 0 but this is - * an extra security check. - */ - if (session->consumer_fd == 0) { - session->consumer_fd = kconsumer_data.cmd_sock; - } + assert(session); - ret = send_kconsumer_session_streams(&kconsumer_data, session); - if (ret < 0) { - ret = LTTCOMM_KERN_CONSUMER_FAIL; - goto error; - } + if (session->consumer_fds_sent == 0 && session->consumer != NULL) { + cds_lfht_for_each_entry(session->consumer->socks->ht, &iter.iter, + socket, node.node) { + /* Code flow error */ + assert(socket->fd >= 0); - session->consumer_fds_sent = 1; + pthread_mutex_lock(socket->lock); + ret = kernel_consumer_send_session(socket->fd, session); + pthread_mutex_unlock(socket->lock); + if (ret < 0) { + ret = LTTCOMM_KERN_CONSUMER_FAIL; + goto error; + } + } } error: @@ -1764,57 +1963,320 @@ error: } /* - * Create an UST session and add it to the session ust list. + * Create a socket to the relayd using the URI. + * + * On success, the relayd_sock pointer is set to the created socket. + * Else, it is untouched and an lttcomm error code is returned. */ -static int create_ust_session(struct ltt_session *session, - struct lttng_domain *domain) +static int create_connect_relayd(struct consumer_output *output, + const char *session_name, struct lttng_uri *uri, + struct lttcomm_sock **relayd_sock) { - struct ltt_ust_session *lus = NULL; int ret; + struct lttcomm_sock *sock; - switch (domain->type) { - case LTTNG_DOMAIN_UST: - break; - default: - ret = LTTCOMM_UNKNOWN_DOMAIN; + /* Create socket object from URI */ + sock = lttcomm_alloc_sock_from_uri(uri); + if (sock == NULL) { + ret = LTTCOMM_FATAL; goto error; } - DBG("Creating UST session"); - - lus = trace_ust_create_session(session->path, session->id, domain); - if (lus == NULL) { - ret = LTTCOMM_UST_SESS_FAIL; + ret = lttcomm_create_sock(sock); + if (ret < 0) { + ret = LTTCOMM_FATAL; goto error; } - ret = run_as_mkdir_recursive(lus->pathname, S_IRWXU | S_IRWXG, - session->uid, session->gid); + /* Connect to relayd so we can proceed with a session creation. */ + ret = relayd_connect(sock); if (ret < 0) { - if (ret != -EEXIST) { - ERR("Trace directory creation error"); - ret = LTTCOMM_UST_SESS_FAIL; - goto error; + ERR("Unable to reach lttng-relayd"); + ret = LTTCOMM_RELAYD_SESSION_FAIL; + goto free_sock; + } + + /* Create socket for control stream. */ + if (uri->stype == LTTNG_STREAM_CONTROL) { + DBG3("Creating relayd stream socket from URI"); + + /* Check relayd version */ + ret = relayd_version_check(sock, LTTNG_UST_COMM_MAJOR, 0); + if (ret < 0) { + ret = LTTCOMM_RELAYD_VERSION_FAIL; + goto close_sock; + } + } else if (uri->stype == LTTNG_STREAM_DATA) { + DBG3("Creating relayd data socket from URI"); + } else { + /* Command is not valid */ + ERR("Relayd invalid stream type: %d", uri->stype); + ret = LTTCOMM_INVALID; + goto close_sock; + } + + *relayd_sock = sock; + + return LTTCOMM_OK; + +close_sock: + if (sock) { + (void) relayd_close(sock); + } +free_sock: + if (sock) { + lttcomm_destroy_sock(sock); + } +error: + return ret; +} + +/* + * Connect to the relayd using URI and send the socket to the right consumer. + */ +static int send_socket_relayd_consumer(int domain, struct ltt_session *session, + struct lttng_uri *relayd_uri, struct consumer_output *consumer, + int consumer_fd) +{ + int ret; + struct lttcomm_sock *sock = NULL; + + /* Set the network sequence index if not set. */ + if (consumer->net_seq_index == -1) { + /* + * Increment net_seq_idx because we are about to transfer the + * new relayd socket to the consumer. + */ + uatomic_inc(&relayd_net_seq_idx); + /* Assign unique key so the consumer can match streams */ + consumer->net_seq_index = uatomic_read(&relayd_net_seq_idx); + } + + /* Connect to relayd and make version check if uri is the control. */ + ret = create_connect_relayd(consumer, session->name, relayd_uri, &sock); + if (ret != LTTCOMM_OK) { + goto close_sock; + } + + /* If the control socket is connected, network session is ready */ + if (relayd_uri->stype == LTTNG_STREAM_CONTROL) { + session->net_handle = 1; + } + + /* Send relayd socket to consumer. */ + ret = consumer_send_relayd_socket(consumer_fd, sock, + consumer, relayd_uri->stype); + if (ret < 0) { + ret = LTTCOMM_ENABLE_CONSUMER_FAIL; + goto close_sock; + } + + ret = LTTCOMM_OK; + + /* + * Close socket which was dup on the consumer side. The session daemon does + * NOT keep track of the relayd socket(s) once transfer to the consumer. + */ + +close_sock: + if (sock) { + (void) relayd_close(sock); + lttcomm_destroy_sock(sock); + } + + return ret; +} + +/* + * Send both relayd sockets to a specific consumer and domain. This is a + * helper function to facilitate sending the information to the consumer for a + * session. + */ +static int send_sockets_relayd_consumer(int domain, + struct ltt_session *session, struct consumer_output *consumer, int fd) +{ + int ret; + + /* Sending control relayd socket. */ + ret = send_socket_relayd_consumer(domain, session, + &consumer->dst.net.control, consumer, fd); + if (ret != LTTCOMM_OK) { + goto error; + } + + /* Sending data relayd socket. */ + ret = send_socket_relayd_consumer(domain, session, + &consumer->dst.net.data, consumer, fd); + if (ret != LTTCOMM_OK) { + goto error; + } + +error: + return ret; +} + +/* + * Setup relayd connections for a tracing session. First creates the socket to + * the relayd and send them to the right domain consumer. Consumer type MUST be + * network. + */ +static int setup_relayd(struct ltt_session *session) +{ + int ret = LTTCOMM_OK; + struct ltt_ust_session *usess; + struct ltt_kernel_session *ksess; + struct consumer_socket *socket; + struct lttng_ht_iter iter; + + assert(session); + + usess = session->ust_session; + ksess = session->kernel_session; + + DBG2("Setting relayd for session %s", session->name); + + if (usess && usess->consumer->type == CONSUMER_DST_NET && + usess->consumer->enabled) { + /* For each consumer socket, send relayd sockets */ + cds_lfht_for_each_entry(usess->consumer->socks->ht, &iter.iter, + socket, node.node) { + /* Code flow error */ + assert(socket->fd >= 0); + + pthread_mutex_lock(socket->lock); + send_sockets_relayd_consumer(LTTNG_DOMAIN_UST, session, + usess->consumer, socket->fd); + pthread_mutex_unlock(socket->lock); + if (ret != LTTCOMM_OK) { + goto error; + } + } + } else if (ksess && ksess->consumer->type == CONSUMER_DST_NET && + ksess->consumer->enabled) { + cds_lfht_for_each_entry(ksess->consumer->socks->ht, &iter.iter, + socket, node.node) { + /* Code flow error */ + assert(socket->fd >= 0); + + pthread_mutex_lock(socket->lock); + send_sockets_relayd_consumer(LTTNG_DOMAIN_KERNEL, session, + ksess->consumer, socket->fd); + pthread_mutex_unlock(socket->lock); + if (ret != LTTCOMM_OK) { + goto error; + } } } - /* The domain type dictate different actions on session creation */ +error: + return ret; +} + +/* + * Copy consumer output from the tracing session to the domain session. The + * function also applies the right modification on a per domain basis for the + * trace files destination directory. + */ +static int copy_session_consumer(int domain, struct ltt_session *session) +{ + int ret; + const char *dir_name; + struct consumer_output *consumer; + + switch (domain) { + case LTTNG_DOMAIN_KERNEL: + DBG3("Copying tracing session consumer output in kernel session"); + session->kernel_session->consumer = + consumer_copy_output(session->consumer); + /* Ease our life a bit for the next part */ + consumer = session->kernel_session->consumer; + dir_name = DEFAULT_KERNEL_TRACE_DIR; + break; + case LTTNG_DOMAIN_UST: + DBG3("Copying tracing session consumer output in UST session"); + session->ust_session->consumer = + consumer_copy_output(session->consumer); + /* Ease our life a bit for the next part */ + consumer = session->ust_session->consumer; + dir_name = DEFAULT_UST_TRACE_DIR; + break; + default: + ret = LTTCOMM_UNKNOWN_DOMAIN; + goto error; + } + + /* Append correct directory to subdir */ + strncat(consumer->subdir, dir_name, sizeof(consumer->subdir)); + DBG3("Copy session consumer subdir %s", consumer->subdir); + + /* Add default trace directory name */ + if (consumer->type == CONSUMER_DST_LOCAL) { + strncat(consumer->dst.trace_path, dir_name, + sizeof(consumer->dst.trace_path)); + } + + ret = LTTCOMM_OK; + +error: + return ret; +} + +/* + * Create an UST session and add it to the session ust list. + */ +static int create_ust_session(struct ltt_session *session, + struct lttng_domain *domain) +{ + int ret; + struct ltt_ust_session *lus = NULL; + + assert(session); + assert(session->consumer); + switch (domain->type) { case LTTNG_DOMAIN_UST: - /* No ustctl for the global UST domain */ break; default: ERR("Unknown UST domain on create session %d", domain->type); + ret = LTTCOMM_UNKNOWN_DOMAIN; goto error; } + + DBG("Creating UST session"); + + lus = trace_ust_create_session(session->path, session->id, domain); + if (lus == NULL) { + ret = LTTCOMM_UST_SESS_FAIL; + goto error; + } + + if (session->consumer->type == CONSUMER_DST_LOCAL) { + ret = run_as_mkdir_recursive(lus->pathname, S_IRWXU | S_IRWXG, + session->uid, session->gid); + if (ret < 0) { + if (ret != -EEXIST) { + ERR("Trace directory creation error"); + ret = LTTCOMM_UST_SESS_FAIL; + goto error; + } + } + } + lus->uid = session->uid; lus->gid = session->gid; session->ust_session = lus; + /* Copy session output to the newly created UST session */ + ret = copy_session_consumer(domain->type, session); + if (ret != LTTCOMM_OK) { + goto error; + } + return LTTCOMM_OK; error: free(lus); + session->ust_session = NULL; return ret; } @@ -1833,23 +2295,33 @@ static int create_kernel_session(struct ltt_session *session) goto error; } - /* Set kernel consumer socket fd */ - if (kconsumer_data.cmd_sock) { - session->kernel_session->consumer_fd = kconsumer_data.cmd_sock; + /* Copy session output to the newly created Kernel session */ + ret = copy_session_consumer(LTTNG_DOMAIN_KERNEL, session); + if (ret != LTTCOMM_OK) { + goto error; } - ret = run_as_mkdir_recursive(session->kernel_session->trace_path, - S_IRWXU | S_IRWXG, session->uid, session->gid); - if (ret < 0) { - if (ret != -EEXIST) { - ERR("Trace directory creation error"); - goto error; + /* Create directory(ies) on local filesystem. */ + if (session->consumer->type == CONSUMER_DST_LOCAL) { + ret = run_as_mkdir_recursive( + session->kernel_session->consumer->dst.trace_path, + S_IRWXU | S_IRWXG, session->uid, session->gid); + if (ret < 0) { + if (ret != -EEXIST) { + ERR("Trace directory creation error"); + goto error; + } } } + session->kernel_session->uid = session->uid; session->kernel_session->gid = session->gid; + return LTTCOMM_OK; + error: + trace_kernel_destroy_session(session->kernel_session); + session->kernel_session = NULL; return ret; } @@ -1866,6 +2338,9 @@ static int session_access_ok(struct ltt_session *session, uid_t uid, gid_t gid) } } +/* + * Count number of session permitted by uid/gid. + */ static unsigned int lttng_sessions_count(uid_t uid, gid_t gid) { unsigned int i = 0; @@ -2032,16 +2507,21 @@ static int list_lttng_ust_global_events(char *channel_name, tmp[i].type = LTTNG_EVENT_FUNCTION; break; } - strncpy(tmp[i].loglevel, uevent->attr.loglevel, LTTNG_SYMBOL_NAME_LEN); - tmp[i].loglevel[LTTNG_SYMBOL_NAME_LEN - 1] = '\0'; + tmp[i].loglevel = uevent->attr.loglevel; switch (uevent->attr.loglevel_type) { - case LTTNG_UST_LOGLEVEL: - tmp[i].type = LTTNG_EVENT_LOGLEVEL; + case LTTNG_UST_LOGLEVEL_ALL: + tmp[i].loglevel_type = LTTNG_EVENT_LOGLEVEL_ALL; + break; + case LTTNG_UST_LOGLEVEL_RANGE: + tmp[i].loglevel_type = LTTNG_EVENT_LOGLEVEL_RANGE; break; - case LTTNG_UST_LOGLEVEL_ONLY: - tmp[i].type = LTTNG_EVENT_LOGLEVEL_ONLY; + case LTTNG_UST_LOGLEVEL_SINGLE: + tmp[i].loglevel_type = LTTNG_EVENT_LOGLEVEL_SINGLE; break; } + if (uevent->filter) { + tmp[i].filter = 1; + } i++; } @@ -2424,14 +2904,48 @@ error: return ret; } +/* + * Command LTTNG_SET_FILTER processed by the client thread. + */ +static int cmd_set_filter(struct ltt_session *session, int domain, + char *channel_name, char *event_name, + struct lttng_filter_bytecode *bytecode) +{ + int ret; + + switch (domain) { + case LTTNG_DOMAIN_KERNEL: + ret = LTTCOMM_FATAL; + break; + case LTTNG_DOMAIN_UST: + { + struct ltt_ust_session *usess = session->ust_session; + + ret = filter_ust_set(usess, domain, bytecode, event_name, channel_name); + if (ret != LTTCOMM_OK) { + goto error; + } + break; + } +#if 0 + case LTTNG_DOMAIN_UST_EXEC_NAME: + case LTTNG_DOMAIN_UST_PID: + case LTTNG_DOMAIN_UST_PID_FOLLOW_CHILDREN: +#endif + default: + ret = LTTCOMM_UND; + goto error; + } + + ret = LTTCOMM_OK; + +error: + return ret; + +} + /* * Command LTTNG_ENABLE_EVENT processed by the client thread. - * - * TODO: currently, both events and loglevels are kept within the same - * namespace for UST global registry/app registery, so if an event - * happen to have the same name as the loglevel (very unlikely though), - * and an attempt is made to enable/disable both in the same session, - * the first to be created will be the only one allowed to exist. */ static int cmd_enable_event(struct ltt_session *session, int domain, char *channel_name, struct lttng_event *event) @@ -2713,6 +3227,36 @@ error: return -ret; } +/* + * Command LTTNG_LIST_TRACEPOINT_FIELDS processed by the client thread. + */ +static ssize_t cmd_list_tracepoint_fields(int domain, + struct lttng_event_field **fields) +{ + int ret; + ssize_t nb_fields = 0; + + switch (domain) { + case LTTNG_DOMAIN_UST: + nb_fields = ust_app_list_event_fields(fields); + if (nb_fields < 0) { + ret = LTTCOMM_UST_LIST_FAIL; + goto error; + } + break; + case LTTNG_DOMAIN_KERNEL: + default: /* fall-through */ + ret = LTTCOMM_UND; + goto error; + } + + return nb_fields; + +error: + /* Return negative value to differentiate return code */ + return -ret; +} + /* * Command LTTNG_START_TRACE processed by the client thread. */ @@ -2721,25 +3265,32 @@ static int cmd_start_trace(struct ltt_session *session) int ret; struct ltt_kernel_session *ksession; struct ltt_ust_session *usess; + struct ltt_kernel_channel *kchan; - /* Short cut */ + /* Ease our life a bit ;) */ ksession = session->kernel_session; usess = session->ust_session; if (session->enabled) { - ret = LTTCOMM_UST_START_FAIL; + /* Already started. */ + ret = LTTCOMM_TRACE_ALREADY_STARTED; goto error; } session->enabled = 1; + ret = setup_relayd(session); + if (ret != LTTCOMM_OK) { + ERR("Error setting up relayd for session %s", session->name); + goto error; + } + /* Kernel tracing */ if (ksession != NULL) { - struct ltt_kernel_channel *kchan; - /* Open kernel metadata */ if (ksession->metadata == NULL) { - ret = kernel_open_metadata(ksession, ksession->trace_path); + ret = kernel_open_metadata(ksession, + ksession->consumer->dst.trace_path); if (ret < 0) { ret = LTTCOMM_KERN_META_FAIL; goto error; @@ -2747,7 +3298,7 @@ static int cmd_start_trace(struct ltt_session *session) } /* Open kernel metadata stream */ - if (ksession->metadata_stream_fd == 0) { + if (ksession->metadata_stream_fd < 0) { ret = kernel_open_metadata_stream(ksession); if (ret < 0) { ERR("Kernel create metadata stream failed"); @@ -2819,7 +3370,7 @@ static int cmd_stop_trace(struct ltt_session *session) usess = session->ust_session; if (!session->enabled) { - ret = LTTCOMM_UST_STOP_FAIL; + ret = LTTCOMM_TRACE_ALREADY_STOPPED; goto error; } @@ -2829,12 +3380,15 @@ static int cmd_stop_trace(struct ltt_session *session) if (ksession != NULL) { DBG("Stop kernel tracing"); - /* Flush all buffers before stopping */ - ret = kernel_metadata_flush_buffer(ksession->metadata_stream_fd); - if (ret < 0) { - ERR("Kernel metadata flush failed"); + /* Flush metadata if exist */ + if (ksession->metadata_stream_fd >= 0) { + ret = kernel_metadata_flush_buffer(ksession->metadata_stream_fd); + if (ret < 0) { + ERR("Kernel metadata flush failed"); + } } + /* Flush all buffers before stopping */ cds_list_for_each_entry(kchan, &ksession->channel_list.head, list) { ret = kernel_flush_buffer(kchan); if (ret < 0) { @@ -2868,32 +3422,125 @@ error: } /* - * Command LTTNG_CREATE_SESSION processed by the client thread. + * Command LTTNG_CREATE_SESSION_URI processed by the client thread. */ -static int cmd_create_session(char *name, char *path, struct ucred *creds) +static int cmd_create_session_uri(char *name, struct lttng_uri *ctrl_uri, + struct lttng_uri *data_uri, unsigned int enable_consumer, + lttng_sock_cred *creds) { int ret; + char *path = NULL; + struct ltt_session *session; + struct consumer_output *consumer; - ret = session_create(name, path, creds->uid, creds->gid); - if (ret != LTTCOMM_OK) { + /* Verify if the session already exist */ + session = session_find_by_name(name); + if (session != NULL) { + ret = LTTCOMM_EXIST_SESS; goto error; } - ret = LTTCOMM_OK; + /* TODO: validate URIs */ -error: - return ret; -} + /* Create default consumer output */ + consumer = consumer_create_output(CONSUMER_DST_LOCAL); + if (consumer == NULL) { + ret = LTTCOMM_FATAL; + goto error; + } + strncpy(consumer->subdir, ctrl_uri->subdir, sizeof(consumer->subdir)); + DBG2("Consumer subdir set to %s", consumer->subdir); -/* - * Command LTTNG_DESTROY_SESSION processed by the client thread. - */ -static int cmd_destroy_session(struct ltt_session *session, char *name) -{ - int ret; + switch (ctrl_uri->dtype) { + case LTTNG_DST_IPV4: + case LTTNG_DST_IPV6: + /* Set control URI into consumer output object */ + ret = consumer_set_network_uri(consumer, ctrl_uri); + if (ret < 0) { + ret = LTTCOMM_FATAL; + goto error; + } - /* Clean kernel session teardown */ - teardown_kernel_session(session); + /* Set data URI into consumer output object */ + ret = consumer_set_network_uri(consumer, data_uri); + if (ret < 0) { + ret = LTTCOMM_FATAL; + goto error; + } + + /* Empty path since the session is network */ + path = ""; + break; + case LTTNG_DST_PATH: + /* Very volatile pointer. Only used for the create session. */ + path = ctrl_uri->dst.path; + strncpy(consumer->dst.trace_path, path, + sizeof(consumer->dst.trace_path)); + break; + } + + /* Set if the consumer is enabled or not */ + consumer->enabled = enable_consumer; + + ret = session_create(name, path, LTTNG_SOCK_GET_UID_CRED(creds), + LTTNG_SOCK_GET_GID_CRED(creds)); + if (ret != LTTCOMM_OK) { + goto consumer_error; + } + + /* Get the newly created session pointer back */ + session = session_find_by_name(name); + assert(session); + + /* Assign consumer to session */ + session->consumer = consumer; + + return LTTCOMM_OK; + +consumer_error: + consumer_destroy_output(consumer); +error: + return ret; +} + +/* + * Command LTTNG_CREATE_SESSION processed by the client thread. + */ +static int cmd_create_session(char *name, char *path, lttng_sock_cred *creds) +{ + int ret; + struct lttng_uri uri; + + /* Zeroed temporary URI */ + memset(&uri, 0, sizeof(uri)); + + uri.dtype = LTTNG_DST_PATH; + uri.utype = LTTNG_URI_DST; + strncpy(uri.dst.path, path, sizeof(uri.dst.path)); + + /* TODO: Strip date-time from path and put it in uri's subdir */ + + ret = cmd_create_session_uri(name, &uri, NULL, 1, creds); + if (ret != LTTCOMM_OK) { + goto error; + } + +error: + return ret; +} + +/* + * Command LTTNG_DESTROY_SESSION processed by the client thread. + */ +static int cmd_destroy_session(struct ltt_session *session, char *name) +{ + int ret; + + /* Safety net */ + assert(session); + + /* Clean kernel session teardown */ + teardown_kernel_session(session); /* UST session teardown */ teardown_ust_session(session); @@ -2903,7 +3550,7 @@ static int cmd_destroy_session(struct ltt_session *session, char *name) */ ret = notify_thread_pipe(kernel_poll_pipe[1]); if (ret < 0) { - perror("write kernel poll pipe"); + PERROR("write kernel poll pipe"); } ret = session_destroy(session); @@ -2961,6 +3608,7 @@ static int cmd_register_consumer(struct ltt_session *session, int domain, char *sock_path) { int ret, sock; + struct consumer_socket *socket; switch (domain) { case LTTNG_DOMAIN_KERNEL: @@ -2976,7 +3624,29 @@ static int cmd_register_consumer(struct ltt_session *session, int domain, goto error; } - session->kernel_session->consumer_fd = sock; + socket = consumer_allocate_socket(sock); + if (socket == NULL) { + ret = LTTCOMM_FATAL; + close(sock); + goto error; + } + + socket->lock = zmalloc(sizeof(pthread_mutex_t)); + if (socket->lock == NULL) { + PERROR("zmalloc pthread mutex"); + ret = LTTCOMM_FATAL; + goto error; + } + pthread_mutex_init(socket->lock, NULL); + + rcu_read_lock(); + consumer_add_socket(socket, session->kernel_session->consumer); + rcu_read_unlock(); + + pthread_mutex_lock(&kconsumer_data.pid_mutex); + kconsumer_data.pid = -1; + pthread_mutex_unlock(&kconsumer_data.pid_mutex); + break; default: /* TODO: Userspace tracing */ @@ -3113,22 +3783,450 @@ error: return ret; } +/* + * Command LTTNG_SET_CONSUMER_URI processed by the client thread. + */ +static int cmd_set_consumer_uri(int domain, struct ltt_session *session, + struct lttng_uri *uri) +{ + int ret; + struct ltt_kernel_session *ksess = session->kernel_session; + struct ltt_ust_session *usess = session->ust_session; + struct consumer_output *consumer; + + /* Can't enable consumer after session started. */ + if (session->enabled) { + ret = LTTCOMM_TRACE_ALREADY_STARTED; + goto error; + } + + switch (domain) { + case LTTNG_DOMAIN_KERNEL: + { + struct lttng_ht_iter iter; + struct consumer_socket *socket; + + /* Code flow error if we don't have a kernel session here. */ + assert(ksess); + + /* Create consumer output if none exists */ + consumer = ksess->tmp_consumer; + if (consumer == NULL) { + consumer = consumer_copy_output(ksess->consumer); + if (consumer == NULL) { + ret = LTTCOMM_FATAL; + goto error; + } + /* Reassign new pointer */ + ksess->tmp_consumer = consumer; + } + + switch (uri->dtype) { + case LTTNG_DST_IPV4: + case LTTNG_DST_IPV6: + DBG2("Setting network URI for kernel session %s", session->name); + + /* Set URI into consumer output object */ + ret = consumer_set_network_uri(consumer, uri); + if (ret < 0) { + ret = LTTCOMM_FATAL; + goto error; + } + + /* On a new subdir, reappend the default trace dir. */ + if (strlen(uri->subdir) != 0) { + strncat(consumer->subdir, DEFAULT_KERNEL_TRACE_DIR, + sizeof(consumer->subdir)); + } + + cds_lfht_for_each_entry(consumer->socks->ht, &iter.iter, + socket, node.node) { + /* Code flow error */ + assert(socket->fd >= 0); + + pthread_mutex_lock(socket->lock); + ret = send_socket_relayd_consumer(domain, session, uri, consumer, + socket->fd); + pthread_mutex_unlock(socket->lock); + if (ret != LTTCOMM_OK) { + goto error; + } + } + + break; + case LTTNG_DST_PATH: + DBG2("Setting trace directory path from URI to %s", uri->dst.path); + memset(consumer->dst.trace_path, 0, + sizeof(consumer->dst.trace_path)); + strncpy(consumer->dst.trace_path, uri->dst.path, + sizeof(consumer->dst.trace_path)); + /* Append default kernel trace dir */ + strncat(consumer->dst.trace_path, DEFAULT_KERNEL_TRACE_DIR, + sizeof(consumer->dst.trace_path)); + break; + } + + /* All good! */ + break; + } + case LTTNG_DOMAIN_UST: + /* Code flow error if we don't have a kernel session here. */ + assert(usess); + + /* Create consumer output if none exists */ + consumer = usess->tmp_consumer; + if (consumer == NULL) { + consumer = consumer_copy_output(usess->consumer); + if (consumer == NULL) { + ret = LTTCOMM_FATAL; + goto error; + } + /* Reassign new pointer */ + usess->tmp_consumer = consumer; + } + + switch (uri->dtype) { + case LTTNG_DST_IPV4: + case LTTNG_DST_IPV6: + { + struct consumer_socket *socket; + + DBG2("Setting network URI for UST session %s", session->name); + + /* Set URI into consumer object */ + ret = consumer_set_network_uri(consumer, uri); + if (ret < 0) { + ret = LTTCOMM_FATAL; + goto error; + } + + /* On a new subdir, reappend the default trace dir. */ + if (strlen(uri->subdir) != 0) { + strncat(consumer->subdir, DEFAULT_UST_TRACE_DIR, + sizeof(consumer->subdir)); + } + + rcu_read_lock(); + socket = consumer_find_socket(uatomic_read(&ust_consumerd64_fd), + consumer); + if (socket != NULL) { + pthread_mutex_lock(socket->lock); + ret = send_socket_relayd_consumer(domain, session, uri, + consumer, socket->fd); + pthread_mutex_unlock(socket->lock); + if (ret != LTTCOMM_OK) { + goto error; + } + } + + socket = consumer_find_socket(uatomic_read(&ust_consumerd32_fd), + consumer); + if (socket != NULL) { + pthread_mutex_lock(socket->lock); + ret = send_socket_relayd_consumer(domain, session, uri, + consumer, socket->fd); + pthread_mutex_unlock(socket->lock); + if (ret != LTTCOMM_OK) { + goto error; + } + } + rcu_read_unlock(); + break; + } + case LTTNG_DST_PATH: + DBG2("Setting trace directory path from URI to %s", uri->dst.path); + memset(consumer->dst.trace_path, 0, + sizeof(consumer->dst.trace_path)); + strncpy(consumer->dst.trace_path, uri->dst.path, + sizeof(consumer->dst.trace_path)); + /* Append default UST trace dir */ + strncat(consumer->dst.trace_path, DEFAULT_UST_TRACE_DIR, + sizeof(consumer->dst.trace_path)); + break; + } + break; + } + + /* All good! */ + ret = LTTCOMM_OK; + +error: + return ret; +} + +/* + * Command LTTNG_DISABLE_CONSUMER processed by the client thread. + */ +static int cmd_disable_consumer(int domain, struct ltt_session *session) +{ + int ret; + struct ltt_kernel_session *ksess = session->kernel_session; + struct ltt_ust_session *usess = session->ust_session; + struct consumer_output *consumer; + + if (session->enabled) { + /* Can't disable consumer on an already started session */ + ret = LTTCOMM_TRACE_ALREADY_STARTED; + goto error; + } + + switch (domain) { + case LTTNG_DOMAIN_KERNEL: + /* Code flow error if we don't have a kernel session here. */ + assert(ksess); + + DBG("Disabling kernel consumer"); + consumer = ksess->consumer; + + break; + case LTTNG_DOMAIN_UST: + /* Code flow error if we don't have a UST session here. */ + assert(usess); + + DBG("Disabling UST consumer"); + consumer = usess->consumer; + + break; + default: + ret = LTTCOMM_UNKNOWN_DOMAIN; + goto error; + } + + assert(consumer); + consumer->enabled = 0; + + /* Success at this point */ + ret = LTTCOMM_OK; + +error: + return ret; +} + +/* + * Command LTTNG_ENABLE_CONSUMER processed by the client thread. + */ +static int cmd_enable_consumer(int domain, struct ltt_session *session) +{ + int ret; + struct ltt_kernel_session *ksess = session->kernel_session; + struct ltt_ust_session *usess = session->ust_session; + struct consumer_output *tmp_out; + + /* Can't enable consumer after session started. */ + if (session->enabled) { + ret = LTTCOMM_TRACE_ALREADY_STARTED; + goto error; + } + + switch (domain) { + case LTTNG_DOMAIN_KERNEL: + /* Code flow error if we don't have a kernel session here. */ + assert(ksess); + + /* + * Check if we have already sent fds to the consumer. In that case, + * the enable-consumer command can't be used because a start trace + * had previously occured. + */ + if (ksess->consumer_fds_sent) { + ret = LTTCOMM_ENABLE_CONSUMER_FAIL; + goto error; + } + + tmp_out = ksess->tmp_consumer; + if (tmp_out == NULL) { + /* No temp. consumer output exists. Using the current one. */ + DBG3("No temporary consumer. Using default"); + ret = LTTCOMM_OK; + goto error; + } + + switch (tmp_out->type) { + case CONSUMER_DST_LOCAL: + DBG2("Consumer output is local. Creating directory(ies)"); + + /* Create directory(ies) */ + ret = run_as_mkdir_recursive(tmp_out->dst.trace_path, + S_IRWXU | S_IRWXG, session->uid, session->gid); + if (ret < 0) { + if (ret != -EEXIST) { + ERR("Trace directory creation error"); + ret = LTTCOMM_FATAL; + goto error; + } + } + break; + case CONSUMER_DST_NET: + DBG2("Consumer output is network. Validating URIs"); + /* Validate if we have both control and data path set. */ + if (!tmp_out->dst.net.control_isset) { + ret = LTTCOMM_URI_CTRL_MISS; + goto error; + } + + if (!tmp_out->dst.net.data_isset) { + ret = LTTCOMM_URI_DATA_MISS; + goto error; + } + + /* Check established network session state */ + if (session->net_handle == 0) { + ret = LTTCOMM_ENABLE_CONSUMER_FAIL; + ERR("Session network handle is not set on enable-consumer"); + goto error; + } + + /* Append default kernel trace dir to subdir */ + strncat(ksess->consumer->subdir, DEFAULT_KERNEL_TRACE_DIR, + sizeof(ksess->consumer->subdir)); + + break; + } + + /* + * @session-lock + * This is race free for now since the session lock is acquired before + * ending up in this function. No other threads can access this kernel + * session without this lock hence freeing the consumer output object + * is valid. + */ + consumer_destroy_output(ksess->consumer); + ksess->consumer = tmp_out; + ksess->tmp_consumer = NULL; + + break; + case LTTNG_DOMAIN_UST: + /* Code flow error if we don't have a UST session here. */ + assert(usess); + + /* + * Check if we have already sent fds to the consumer. In that case, + * the enable-consumer command can't be used because a start trace + * had previously occured. + */ + if (usess->start_trace) { + ret = LTTCOMM_ENABLE_CONSUMER_FAIL; + goto error; + } + + tmp_out = usess->tmp_consumer; + if (tmp_out == NULL) { + /* No temp. consumer output exists. Using the current one. */ + DBG3("No temporary consumer. Using default"); + ret = LTTCOMM_OK; + goto error; + } + + switch (tmp_out->type) { + case CONSUMER_DST_LOCAL: + DBG2("Consumer output is local. Creating directory(ies)"); + + /* Create directory(ies) */ + ret = run_as_mkdir_recursive(tmp_out->dst.trace_path, + S_IRWXU | S_IRWXG, session->uid, session->gid); + if (ret < 0) { + if (ret != -EEXIST) { + ERR("Trace directory creation error"); + ret = LTTCOMM_FATAL; + goto error; + } + } + break; + case CONSUMER_DST_NET: + DBG2("Consumer output is network. Validating URIs"); + /* Validate if we have both control and data path set. */ + if (!tmp_out->dst.net.control_isset) { + ret = LTTCOMM_URI_CTRL_MISS; + goto error; + } + + if (!tmp_out->dst.net.data_isset) { + ret = LTTCOMM_URI_DATA_MISS; + goto error; + } + + /* Check established network session state */ + if (session->net_handle == 0) { + ret = LTTCOMM_ENABLE_CONSUMER_FAIL; + DBG2("Session network handle is not set on enable-consumer"); + goto error; + } + + if (tmp_out->net_seq_index == -1) { + ret = LTTCOMM_ENABLE_CONSUMER_FAIL; + DBG2("Network index is not set on the consumer"); + goto error; + } + + /* Append default kernel trace dir to subdir */ + strncat(usess->consumer->subdir, DEFAULT_UST_TRACE_DIR, + sizeof(usess->consumer->subdir)); + + break; + } + + /* + * @session-lock + * This is race free for now since the session lock is acquired before + * ending up in this function. No other threads can access this kernel + * session without this lock hence freeing the consumer output object + * is valid. + */ + consumer_destroy_output(usess->consumer); + usess->consumer = tmp_out; + usess->tmp_consumer = NULL; + + break; + } + + /* Success at this point */ + ret = LTTCOMM_OK; + +error: + return ret; +} + /* * Process the command requested by the lttng client within the command * context structure. This function make sure that the return structure (llm) * is set and ready for transmission before returning. * * Return any error encountered or 0 for success. + * + * "sock" is only used for special-case var. len data. */ -static int process_client_msg(struct command_ctx *cmd_ctx) +static int process_client_msg(struct command_ctx *cmd_ctx, int sock, + int *sock_error) { int ret = LTTCOMM_OK; int need_tracing_session = 1; + int need_domain; DBG("Processing client command %d", cmd_ctx->lsm->cmd_type); - if (opt_no_kernel && cmd_ctx->lsm->domain.type == LTTNG_DOMAIN_KERNEL) { - ret = LTTCOMM_KERN_NA; + *sock_error = 0; + + switch (cmd_ctx->lsm->cmd_type) { + case LTTNG_CREATE_SESSION: + case LTTNG_CREATE_SESSION_URI: + case LTTNG_DESTROY_SESSION: + case LTTNG_LIST_SESSIONS: + case LTTNG_LIST_DOMAINS: + case LTTNG_START_TRACE: + case LTTNG_STOP_TRACE: + need_domain = 0; + break; + default: + need_domain = 1; + } + + if (opt_no_kernel && need_domain + && cmd_ctx->lsm->domain.type == LTTNG_DOMAIN_KERNEL) { + if (!is_root) { + ret = LTTCOMM_NEED_ROOT_SESSIOND; + } else { + ret = LTTCOMM_KERN_NA; + } goto error; } @@ -3140,6 +4238,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx) switch(cmd_ctx->lsm->cmd_type) { case LTTNG_LIST_SESSIONS: case LTTNG_LIST_TRACEPOINTS: + case LTTNG_LIST_TRACEPOINT_FIELDS: case LTTNG_LIST_DOMAINS: case LTTNG_LIST_CHANNELS: case LTTNG_LIST_EVENTS: @@ -3155,17 +4254,23 @@ static int process_client_msg(struct command_ctx *cmd_ctx) /* Commands that DO NOT need a session. */ switch (cmd_ctx->lsm->cmd_type) { - case LTTNG_CALIBRATE: case LTTNG_CREATE_SESSION: + case LTTNG_CREATE_SESSION_URI: + case LTTNG_CALIBRATE: case LTTNG_LIST_SESSIONS: case LTTNG_LIST_TRACEPOINTS: + case LTTNG_LIST_TRACEPOINT_FIELDS: need_tracing_session = 0; break; default: DBG("Getting session %s by name", cmd_ctx->lsm->session.name); + /* + * We keep the session list lock across _all_ commands + * for now, because the per-session lock does not + * handle teardown properly. + */ session_lock_list(); cmd_ctx->session = session_find_by_name(cmd_ctx->lsm->session.name); - session_unlock_list(); if (cmd_ctx->session == NULL) { if (cmd_ctx->lsm->session.name != NULL) { ret = LTTCOMM_SESS_NOT_FOUND; @@ -3181,18 +4286,21 @@ static int process_client_msg(struct command_ctx *cmd_ctx) break; } + if (!need_domain) { + goto skip_domain; + } /* * Check domain type for specific "pre-action". */ switch (cmd_ctx->lsm->domain.type) { case LTTNG_DOMAIN_KERNEL: if (!is_root) { - ret = LTTCOMM_KERN_NA; + ret = LTTCOMM_NEED_ROOT_SESSIOND; goto error; } /* Kernel tracer check */ - if (kernel_tracer_fd == 0) { + if (kernel_tracer_fd == -1) { /* Basically, load kernel tracer modules */ ret = init_kernel_tracer(); if (ret != 0) { @@ -3200,8 +4308,16 @@ static int process_client_msg(struct command_ctx *cmd_ctx) } } + /* Consumer is in an ERROR state. Report back to client */ + if (uatomic_read(&kernel_consumerd_state) == CONSUMER_ERROR) { + ret = LTTCOMM_NO_KERNCONSUMERD; + goto error; + } + /* Need a session for kernel command */ if (need_tracing_session) { + struct consumer_socket *socket; + if (cmd_ctx->session->kernel_session == NULL) { ret = create_kernel_session(cmd_ctx->session); if (ret < 0) { @@ -3220,14 +4336,44 @@ static int process_client_msg(struct command_ctx *cmd_ctx) ret = LTTCOMM_KERN_CONSUMER_FAIL; goto error; } + uatomic_set(&kernel_consumerd_state, CONSUMER_STARTED); } else { pthread_mutex_unlock(&kconsumer_data.pid_mutex); } + + /* Set kernel consumer socket fd */ + if (kconsumer_data.cmd_sock >= 0) { + rcu_read_lock(); + socket = consumer_find_socket(kconsumer_data.cmd_sock, + cmd_ctx->session->kernel_session->consumer); + rcu_read_unlock(); + if (socket == NULL) { + socket = consumer_allocate_socket(kconsumer_data.cmd_sock); + if (socket == NULL) { + goto error; + } + + socket->lock = &kconsumer_data.lock; + rcu_read_lock(); + consumer_add_socket(socket, + cmd_ctx->session->kernel_session->consumer); + rcu_read_unlock(); + } + } } + break; case LTTNG_DOMAIN_UST: { + /* Consumer is in an ERROR state. Report back to client */ + if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) { + ret = LTTCOMM_NO_USTCONSUMERD; + goto error; + } + if (need_tracing_session) { + struct consumer_socket *socket; + if (cmd_ctx->session->ust_session == NULL) { ret = create_ust_session(cmd_ctx->session, &cmd_ctx->lsm->domain); @@ -3235,6 +4381,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx) goto error; } } + /* Start the UST consumer daemons */ /* 64-bit */ pthread_mutex_lock(&ustconsumer64_data.pid_mutex); @@ -3245,14 +4392,40 @@ static int process_client_msg(struct command_ctx *cmd_ctx) ret = start_consumerd(&ustconsumer64_data); if (ret < 0) { ret = LTTCOMM_UST_CONSUMER64_FAIL; - ust_consumerd64_fd = -EINVAL; + uatomic_set(&ust_consumerd64_fd, -EINVAL); goto error; } - ust_consumerd64_fd = ustconsumer64_data.cmd_sock; + uatomic_set(&ust_consumerd64_fd, ustconsumer64_data.cmd_sock); + uatomic_set(&ust_consumerd_state, CONSUMER_STARTED); } else { pthread_mutex_unlock(&ustconsumer64_data.pid_mutex); } + + /* + * Setup socket for consumer 64 bit. No need for atomic access + * since it was set above and can ONLY be set in this thread. + */ + if (ust_consumerd64_fd >= 0) { + rcu_read_lock(); + socket = consumer_find_socket(uatomic_read(&ust_consumerd64_fd), + cmd_ctx->session->ust_session->consumer); + rcu_read_unlock(); + if (socket == NULL) { + socket = consumer_allocate_socket(ust_consumerd64_fd); + if (socket == NULL) { + goto error; + } + socket->lock = &ustconsumer32_data.lock; + + rcu_read_lock(); + consumer_add_socket(socket, + cmd_ctx->session->ust_session->consumer); + rcu_read_unlock(); + } + DBG3("UST consumer 64 bit socket set to %d", socket->fd); + } + /* 32-bit */ if (consumerd32_bin[0] != '\0' && ustconsumer32_data.pid == 0 && @@ -3261,19 +4434,65 @@ static int process_client_msg(struct command_ctx *cmd_ctx) ret = start_consumerd(&ustconsumer32_data); if (ret < 0) { ret = LTTCOMM_UST_CONSUMER32_FAIL; - ust_consumerd32_fd = -EINVAL; + uatomic_set(&ust_consumerd32_fd, -EINVAL); goto error; } - ust_consumerd32_fd = ustconsumer32_data.cmd_sock; + + uatomic_set(&ust_consumerd32_fd, ustconsumer32_data.cmd_sock); + uatomic_set(&ust_consumerd_state, CONSUMER_STARTED); } else { pthread_mutex_unlock(&ustconsumer32_data.pid_mutex); } + + /* + * Setup socket for consumer 64 bit. No need for atomic access + * since it was set above and can ONLY be set in this thread. + */ + if (ust_consumerd32_fd >= 0) { + rcu_read_lock(); + socket = consumer_find_socket(uatomic_read(&ust_consumerd64_fd), + cmd_ctx->session->ust_session->consumer); + rcu_read_unlock(); + if (socket == NULL) { + socket = consumer_allocate_socket(ust_consumerd32_fd); + if (socket == NULL) { + goto error; + } + socket->lock = &ustconsumer32_data.lock; + + rcu_read_lock(); + consumer_add_socket(socket, + cmd_ctx->session->ust_session->consumer); + rcu_read_unlock(); + } + DBG3("UST consumer 32 bit socket set to %d", socket->fd); + } } break; } default: break; } +skip_domain: + + /* Validate consumer daemon state when start/stop trace command */ + if (cmd_ctx->lsm->cmd_type == LTTNG_START_TRACE || + cmd_ctx->lsm->cmd_type == LTTNG_STOP_TRACE) { + switch (cmd_ctx->lsm->domain.type) { + case LTTNG_DOMAIN_UST: + if (uatomic_read(&ust_consumerd_state) != CONSUMER_STARTED) { + ret = LTTCOMM_NO_USTCONSUMERD; + goto error; + } + break; + case LTTNG_DOMAIN_KERNEL: + if (uatomic_read(&kernel_consumerd_state) != CONSUMER_STARTED) { + ret = LTTCOMM_NO_KERNCONSUMERD; + goto error; + } + break; + } + } /* * Check that the UID or GID match that of the tracing session. @@ -3281,7 +4500,8 @@ static int process_client_msg(struct command_ctx *cmd_ctx) */ if (need_tracing_session) { if (!session_access_ok(cmd_ctx->session, - cmd_ctx->creds.uid, cmd_ctx->creds.gid)) { + LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds), + LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds))) { ret = LTTCOMM_EPERM; goto error; } @@ -3308,7 +4528,6 @@ static int process_client_msg(struct command_ctx *cmd_ctx) ret = cmd_disable_event(cmd_ctx->session, cmd_ctx->lsm->domain.type, cmd_ctx->lsm->u.disable.channel_name, cmd_ctx->lsm->u.disable.name); - ret = LTTCOMM_OK; break; } case LTTNG_DISABLE_ALL_EVENT: @@ -3319,12 +4538,22 @@ static int process_client_msg(struct command_ctx *cmd_ctx) cmd_ctx->lsm->u.disable.channel_name); break; } + case LTTNG_DISABLE_CONSUMER: + { + ret = cmd_disable_consumer(cmd_ctx->lsm->domain.type, cmd_ctx->session); + break; + } case LTTNG_ENABLE_CHANNEL: { ret = cmd_enable_channel(cmd_ctx->session, cmd_ctx->lsm->domain.type, &cmd_ctx->lsm->u.channel.chan); break; } + case LTTNG_ENABLE_CONSUMER: + { + ret = cmd_enable_consumer(cmd_ctx->lsm->domain.type, cmd_ctx->session); + break; + } case LTTNG_ENABLE_EVENT: { ret = cmd_enable_event(cmd_ctx->session, cmd_ctx->lsm->domain.type, @@ -3336,19 +4565,49 @@ static int process_client_msg(struct command_ctx *cmd_ctx) { DBG("Enabling all events"); - ret = cmd_enable_event_all(cmd_ctx->session, cmd_ctx->lsm->domain.type, - cmd_ctx->lsm->u.enable.channel_name, - cmd_ctx->lsm->u.enable.event.type); + ret = cmd_enable_event_all(cmd_ctx->session, cmd_ctx->lsm->domain.type, + cmd_ctx->lsm->u.enable.channel_name, + cmd_ctx->lsm->u.enable.event.type); + break; + } + case LTTNG_LIST_TRACEPOINTS: + { + struct lttng_event *events; + ssize_t nb_events; + + nb_events = cmd_list_tracepoints(cmd_ctx->lsm->domain.type, &events); + if (nb_events < 0) { + ret = -nb_events; + goto error; + } + + /* + * Setup lttng message with payload size set to the event list size in + * bytes and then copy list into the llm payload. + */ + ret = setup_lttng_msg(cmd_ctx, sizeof(struct lttng_event) * nb_events); + if (ret < 0) { + free(events); + goto setup_error; + } + + /* Copy event list into message payload */ + memcpy(cmd_ctx->llm->payload, events, + sizeof(struct lttng_event) * nb_events); + + free(events); + + ret = LTTCOMM_OK; break; } - case LTTNG_LIST_TRACEPOINTS: + case LTTNG_LIST_TRACEPOINT_FIELDS: { - struct lttng_event *events; - ssize_t nb_events; + struct lttng_event_field *fields; + ssize_t nb_fields; - nb_events = cmd_list_tracepoints(cmd_ctx->lsm->domain.type, &events); - if (nb_events < 0) { - ret = -nb_events; + nb_fields = cmd_list_tracepoint_fields(cmd_ctx->lsm->domain.type, &fields); + if (nb_fields < 0) { + ret = -nb_fields; goto error; } @@ -3356,21 +4615,27 @@ static int process_client_msg(struct command_ctx *cmd_ctx) * Setup lttng message with payload size set to the event list size in * bytes and then copy list into the llm payload. */ - ret = setup_lttng_msg(cmd_ctx, sizeof(struct lttng_event) * nb_events); + ret = setup_lttng_msg(cmd_ctx, sizeof(struct lttng_event_field) * nb_fields); if (ret < 0) { - free(events); + free(fields); goto setup_error; } /* Copy event list into message payload */ - memcpy(cmd_ctx->llm->payload, events, - sizeof(struct lttng_event) * nb_events); + memcpy(cmd_ctx->llm->payload, fields, + sizeof(struct lttng_event_field) * nb_fields); - free(events); + free(fields); ret = LTTCOMM_OK; break; } + case LTTNG_SET_CONSUMER_URI: + { + ret = cmd_set_consumer_uri(cmd_ctx->lsm->domain.type, cmd_ctx->session, + &cmd_ctx->lsm->u.uri); + break; + } case LTTNG_START_TRACE: { ret = cmd_start_trace(cmd_ctx->session); @@ -3387,10 +4652,23 @@ static int process_client_msg(struct command_ctx *cmd_ctx) cmd_ctx->lsm->session.path, &cmd_ctx->creds); break; } + case LTTNG_CREATE_SESSION_URI: + { + ret = cmd_create_session_uri(cmd_ctx->lsm->session.name, + &cmd_ctx->lsm->u.create_uri.ctrl_uri, + &cmd_ctx->lsm->u.create_uri.data_uri, + cmd_ctx->lsm->u.create_uri.enable_consumer, &cmd_ctx->creds); + break; + } case LTTNG_DESTROY_SESSION: { ret = cmd_destroy_session(cmd_ctx->session, cmd_ctx->lsm->session.name); + /* + * Set session to NULL so we do not unlock it after + * free. + */ + cmd_ctx->session = NULL; break; } case LTTNG_LIST_DOMAINS: @@ -3420,7 +4698,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx) } case LTTNG_LIST_CHANNELS: { - size_t nb_chan; + int nb_chan; struct lttng_channel *channels; nb_chan = cmd_list_channels(cmd_ctx->lsm->domain.type, @@ -3475,12 +4753,10 @@ static int process_client_msg(struct command_ctx *cmd_ctx) unsigned int nr_sessions; session_lock_list(); - nr_sessions = lttng_sessions_count(cmd_ctx->creds.uid, cmd_ctx->creds.gid); - if (nr_sessions == 0) { - ret = LTTCOMM_NO_SESSION; - session_unlock_list(); - goto error; - } + nr_sessions = lttng_sessions_count( + LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds), + LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds)); + ret = setup_lttng_msg(cmd_ctx, sizeof(struct lttng_session) * nr_sessions); if (ret < 0) { session_unlock_list(); @@ -3489,7 +4765,8 @@ static int process_client_msg(struct command_ctx *cmd_ctx) /* Filled the session array */ list_lttng_sessions((struct lttng_session *)(cmd_ctx->llm->payload), - cmd_ctx->creds.uid, cmd_ctx->creds.gid); + LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds), + LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds)); session_unlock_list(); @@ -3508,6 +4785,43 @@ static int process_client_msg(struct command_ctx *cmd_ctx) cmd_ctx->lsm->u.reg.path); break; } + case LTTNG_SET_FILTER: + { + struct lttng_filter_bytecode *bytecode; + + if (cmd_ctx->lsm->u.filter.bytecode_len > 65336) { + ret = LTTCOMM_FILTER_INVAL; + goto error; + } + bytecode = zmalloc(cmd_ctx->lsm->u.filter.bytecode_len); + if (!bytecode) { + ret = LTTCOMM_FILTER_NOMEM; + goto error; + } + /* Receive var. len. data */ + DBG("Receiving var len data from client ..."); + ret = lttcomm_recv_unix_sock(sock, bytecode, + cmd_ctx->lsm->u.filter.bytecode_len); + if (ret <= 0) { + DBG("Nothing recv() from client var len data... continuing"); + *sock_error = 1; + ret = LTTCOMM_FILTER_INVAL; + goto error; + } + + if (bytecode->len + sizeof(*bytecode) + != cmd_ctx->lsm->u.filter.bytecode_len) { + free(bytecode); + ret = LTTCOMM_FILTER_INVAL; + goto error; + } + + ret = cmd_set_filter(cmd_ctx->session, cmd_ctx->lsm->domain.type, + cmd_ctx->lsm->u.filter.channel_name, + cmd_ctx->lsm->u.filter.event_name, + bytecode); + break; + } default: ret = LTTCOMM_UND; break; @@ -3526,17 +4840,202 @@ setup_error: if (cmd_ctx->session) { session_unlock(cmd_ctx->session); } + if (need_tracing_session) { + session_unlock_list(); + } init_setup_error: return ret; } +/* + * Thread managing health check socket. + */ +static void *thread_manage_health(void *data) +{ + int sock = -1, new_sock, ret, i, pollfd, err = -1; + uint32_t revents, nb_fd; + struct lttng_poll_event events; + struct lttcomm_health_msg msg; + struct lttcomm_health_data reply; + + DBG("[thread] Manage health check started"); + + rcu_register_thread(); + + /* Create unix socket */ + sock = lttcomm_create_unix_sock(health_unix_sock_path); + if (sock < 0) { + ERR("Unable to create health check Unix socket"); + ret = -1; + goto error; + } + + ret = lttcomm_listen_unix_sock(sock); + if (ret < 0) { + goto error; + } + + /* + * Pass 2 as size here for the thread quit pipe and client_sock. Nothing + * more will be added to this poll set. + */ + ret = create_thread_poll_set(&events, 2); + if (ret < 0) { + goto error; + } + + /* Add the application registration socket */ + ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLPRI); + if (ret < 0) { + goto error; + } + + while (1) { + DBG("Health check ready"); + + nb_fd = LTTNG_POLL_GETNB(&events); + + /* Inifinite blocking call, waiting for transmission */ +restart: + ret = lttng_poll_wait(&events, -1); + if (ret < 0) { + /* + * Restart interrupted system call. + */ + if (errno == EINTR) { + goto restart; + } + goto error; + } + + for (i = 0; i < nb_fd; i++) { + /* Fetch once the poll data */ + revents = LTTNG_POLL_GETEV(&events, i); + pollfd = LTTNG_POLL_GETFD(&events, i); + + /* Thread quit pipe has been closed. Killing thread. */ + ret = check_thread_quit_pipe(pollfd, revents); + if (ret) { + err = 0; + goto exit; + } + + /* Event on the registration socket */ + if (pollfd == sock) { + if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) { + ERR("Health socket poll error"); + goto error; + } + } + } + + new_sock = lttcomm_accept_unix_sock(sock); + if (new_sock < 0) { + goto error; + } + + DBG("Receiving data from client for health..."); + ret = lttcomm_recv_unix_sock(new_sock, (void *)&msg, sizeof(msg)); + if (ret <= 0) { + DBG("Nothing recv() from client... continuing"); + ret = close(new_sock); + if (ret) { + PERROR("close"); + } + new_sock = -1; + continue; + } + + rcu_thread_online(); + + switch (msg.component) { + case LTTNG_HEALTH_CMD: + reply.ret_code = health_check_state(&health_thread_cmd); + break; + case LTTNG_HEALTH_APP_MANAGE: + reply.ret_code = health_check_state(&health_thread_app_manage); + break; + case LTTNG_HEALTH_APP_REG: + reply.ret_code = health_check_state(&health_thread_app_reg); + break; + case LTTNG_HEALTH_KERNEL: + reply.ret_code = health_check_state(&health_thread_kernel); + break; + case LTTNG_HEALTH_CONSUMER: + reply.ret_code = check_consumer_health(); + break; + case LTTNG_HEALTH_ALL: + reply.ret_code = + health_check_state(&health_thread_app_manage) && + health_check_state(&health_thread_app_reg) && + health_check_state(&health_thread_cmd) && + health_check_state(&health_thread_kernel) && + check_consumer_health(); + break; + default: + reply.ret_code = LTTCOMM_UND; + break; + } + + /* + * Flip ret value since 0 is a success and 1 indicates a bad health for + * the client where in the sessiond it is the opposite. Again, this is + * just to make things easier for us poor developer which enjoy a lot + * lazyness. + */ + if (reply.ret_code == 0 || reply.ret_code == 1) { + reply.ret_code = !reply.ret_code; + } + + DBG2("Health check return value %d", reply.ret_code); + + ret = send_unix_sock(new_sock, (void *) &reply, sizeof(reply)); + if (ret < 0) { + ERR("Failed to send health data back to client"); + } + + /* End of transmission */ + ret = close(new_sock); + if (ret) { + PERROR("close"); + } + new_sock = -1; + } + +exit: +error: + if (err) { + ERR("Health error occurred in %s", __func__); + } + DBG("Health check thread dying"); + unlink(health_unix_sock_path); + if (sock >= 0) { + ret = close(sock); + if (ret) { + PERROR("close"); + } + } + if (new_sock >= 0) { + ret = close(new_sock); + if (ret) { + PERROR("close"); + } + } + + lttng_poll_clean(&events); + + rcu_unregister_thread(); + return NULL; +} + /* * This thread manage all clients request using the unix client socket for * communication. */ static void *thread_manage_clients(void *data) { - int sock = 0, ret, i, pollfd; + int sock = -1, ret, i, pollfd, err = -1; + int sock_error; uint32_t revents, nb_fd; struct command_ctx *cmd_ctx = NULL; struct lttng_poll_event events; @@ -3545,6 +5044,8 @@ static void *thread_manage_clients(void *data) rcu_register_thread(); + health_code_update(&health_thread_cmd); + ret = lttcomm_listen_unix_sock(client_sock); if (ret < 0) { goto error; @@ -3572,14 +5073,25 @@ static void *thread_manage_clients(void *data) kill(ppid, SIGUSR1); } + health_code_update(&health_thread_cmd); + while (1) { DBG("Accepting client command ..."); nb_fd = LTTNG_POLL_GETNB(&events); /* Inifinite blocking call, waiting for transmission */ + restart: + health_poll_update(&health_thread_cmd); ret = lttng_poll_wait(&events, -1); + health_poll_update(&health_thread_cmd); if (ret < 0) { + /* + * Restart interrupted system call. + */ + if (errno == EINTR) { + goto restart; + } goto error; } @@ -3588,10 +5100,13 @@ static void *thread_manage_clients(void *data) revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); + health_code_update(&health_thread_cmd); + /* Thread quit pipe has been closed. Killing thread. */ ret = check_thread_quit_pipe(pollfd, revents); if (ret) { - goto error; + err = 0; + goto exit; } /* Event on the registration socket */ @@ -3605,6 +5120,8 @@ static void *thread_manage_clients(void *data) DBG("Wait for client response"); + health_code_update(&health_thread_cmd); + sock = lttcomm_accept_unix_sock(client_sock); if (sock < 0) { goto error; @@ -3619,20 +5136,22 @@ static void *thread_manage_clients(void *data) /* Allocate context command to process the client request */ cmd_ctx = zmalloc(sizeof(struct command_ctx)); if (cmd_ctx == NULL) { - perror("zmalloc cmd_ctx"); + PERROR("zmalloc cmd_ctx"); goto error; } /* Allocate data buffer for reception */ cmd_ctx->lsm = zmalloc(sizeof(struct lttcomm_session_msg)); if (cmd_ctx->lsm == NULL) { - perror("zmalloc cmd_ctx->lsm"); + PERROR("zmalloc cmd_ctx->lsm"); goto error; } cmd_ctx->llm = NULL; cmd_ctx->session = NULL; + health_code_update(&health_thread_cmd); + /* * Data is received from the lttng client. The struct * lttcomm_session_msg (lsm) contains the command and data request of @@ -3643,11 +5162,17 @@ static void *thread_manage_clients(void *data) sizeof(struct lttcomm_session_msg), &cmd_ctx->creds); if (ret <= 0) { DBG("Nothing recv() from client... continuing"); - close(sock); - free(cmd_ctx); + ret = close(sock); + if (ret) { + PERROR("close"); + } + sock = -1; + clean_command_ctx(&cmd_ctx); continue; } + health_code_update(&health_thread_cmd); + // TODO: Validate cmd_ctx including sanity check for // security purpose. @@ -3658,18 +5183,29 @@ static void *thread_manage_clients(void *data) * informations for the client. The command context struct contains * everything this function may needs. */ - ret = process_client_msg(cmd_ctx); + ret = process_client_msg(cmd_ctx, sock, &sock_error); rcu_thread_offline(); if (ret < 0) { + if (sock_error) { + ret = close(sock); + if (ret) { + PERROR("close"); + } + sock = -1; + } /* * TODO: Inform client somehow of the fatal error. At * this point, ret < 0 means that a zmalloc failed - * (ENOMEM). Error detected but still accept command. + * (ENOMEM). Error detected but still accept + * command, unless a socket error has been + * detected. */ clean_command_ctx(&cmd_ctx); continue; } + health_code_update(&health_thread_cmd); + DBG("Sending response (size: %d, retcode: %s)", cmd_ctx->lttng_msg_size, lttng_strerror(-cmd_ctx->llm->ret_code)); @@ -3679,16 +5215,39 @@ static void *thread_manage_clients(void *data) } /* End of transmission */ - close(sock); + ret = close(sock); + if (ret) { + PERROR("close"); + } + sock = -1; clean_command_ctx(&cmd_ctx); + + health_code_update(&health_thread_cmd); } +exit: error: + if (err) { + health_error(&health_thread_cmd); + ERR("Health error occurred in %s", __func__); + } + health_exit(&health_thread_cmd); + DBG("Client thread dying"); unlink(client_unix_sock_path); - close(client_sock); - close(sock); + if (client_sock >= 0) { + ret = close(client_sock); + if (ret) { + PERROR("close"); + } + } + if (sock >= 0) { + ret = close(sock); + if (ret) { + PERROR("close"); + } + } lttng_poll_clean(&events); clean_command_ctx(&cmd_ctx); @@ -3817,11 +5376,11 @@ static int parse_args(int argc, char **argv) opt_no_kernel = 1; break; case 'q': - opt_quiet = 1; + lttng_opt_quiet = 1; break; case 'v': /* Verbose level can increase using multiple -v */ - opt_verbose += 1; + lttng_opt_verbose += 1; break; case 'Z': opt_verbose_consumer += 1; @@ -3872,7 +5431,7 @@ static int init_daemon_socket(void) ret = chmod(client_unix_sock_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret < 0) { ERR("Set file permissions failed: %s", client_unix_sock_path); - perror("chmod"); + PERROR("chmod"); goto end; } @@ -3889,7 +5448,7 @@ static int init_daemon_socket(void) S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); if (ret < 0) { ERR("Set file permissions failed: %s", apps_unix_sock_path); - perror("chmod"); + PERROR("chmod"); goto end; } @@ -3904,70 +5463,74 @@ end: */ static int check_existing_daemon(void) { - if (access(client_unix_sock_path, F_OK) < 0 && - access(apps_unix_sock_path, F_OK) < 0) { - return 0; - } - /* Is there anybody out there ? */ if (lttng_session_daemon_alive()) { return -EEXIST; - } else { - return 0; } + + return 0; } /* * Set the tracing group gid onto the client socket. * * Race window between mkdir and chown is OK because we are going from more - * permissive (root.root) to les permissive (root.tracing). + * permissive (root.root) to less permissive (root.tracing). */ static int set_permissions(char *rundir) { int ret; gid_t gid; - gid = allowed_group(); - if (gid < 0) { + ret = allowed_group(); + if (ret < 0) { WARN("No tracing group detected"); ret = 0; goto end; } + gid = ret; + /* Set lttng run dir */ ret = chown(rundir, 0, gid); if (ret < 0) { ERR("Unable to set group on %s", rundir); - perror("chown"); + PERROR("chown"); + } + + /* Ensure tracing group can search the run dir */ + ret = chmod(rundir, S_IRWXU | S_IXGRP | S_IXOTH); + if (ret < 0) { + ERR("Unable to set permissions on %s", rundir); + PERROR("chmod"); } /* lttng client socket path */ ret = chown(client_unix_sock_path, 0, gid); if (ret < 0) { ERR("Unable to set group on %s", client_unix_sock_path); - perror("chown"); + PERROR("chown"); } /* kconsumer error socket path */ ret = chown(kconsumer_data.err_unix_sock_path, 0, gid); if (ret < 0) { ERR("Unable to set group on %s", kconsumer_data.err_unix_sock_path); - perror("chown"); + PERROR("chown"); } /* 64-bit ustconsumer error socket path */ ret = chown(ustconsumer64_data.err_unix_sock_path, 0, gid); if (ret < 0) { ERR("Unable to set group on %s", ustconsumer64_data.err_unix_sock_path); - perror("chown"); + PERROR("chown"); } /* 32-bit ustconsumer compat32 error socket path */ ret = chown(ustconsumer32_data.err_unix_sock_path, 0, gid); if (ret < 0) { ERR("Unable to set group on %s", ustconsumer32_data.err_unix_sock_path); - perror("chown"); + PERROR("chown"); } DBG("All permissions are set"); @@ -3976,22 +5539,6 @@ end: return ret; } -/* - * Create the pipe used to wake up the kernel thread. - */ -static int create_kernel_poll_pipe(void) -{ - return pipe2(kernel_poll_pipe, O_CLOEXEC); -} - -/* - * Create the application command pipe to wake thread_manage_apps. - */ -static int create_apps_cmd_pipe(void) -{ - return pipe2(apps_cmd_pipe, O_CLOEXEC); -} - /* * Create the lttng run directory needed for all global sockets and pipe. */ @@ -4001,7 +5548,7 @@ static int create_lttng_rundir(const char *rundir) DBG3("Creating LTTng run directory: %s", rundir); - ret = mkdir(rundir, S_IRWXU | S_IRWXG ); + ret = mkdir(rundir, S_IRWXU); if (ret < 0) { if (errno != EEXIST) { ERR("Unable to create %s", rundir); @@ -4043,13 +5590,14 @@ static int set_consumer_sockets(struct consumer_data *consumer_data, DBG2("Creating consumer directory: %s", path); - ret = mkdir(path, S_IRWXU | S_IRWXG); + ret = mkdir(path, S_IRWXU); if (ret < 0) { if (errno != EEXIST) { + PERROR("mkdir"); ERR("Failed to create %s", path); goto error; } - ret = 0; + ret = -1; } /* Create the kconsumerd error unix socket */ @@ -4110,7 +5658,7 @@ static int set_signal_handler(void) sigset_t sigset; if ((ret = sigemptyset(&sigset)) < 0) { - perror("sigemptyset"); + PERROR("sigemptyset"); return ret; } @@ -4118,17 +5666,17 @@ static int set_signal_handler(void) sa.sa_mask = sigset; sa.sa_flags = 0; if ((ret = sigaction(SIGTERM, &sa, NULL)) < 0) { - perror("sigaction"); + PERROR("sigaction"); return ret; } if ((ret = sigaction(SIGINT, &sa, NULL)) < 0) { - perror("sigaction"); + PERROR("sigaction"); return ret; } if ((ret = sigaction(SIGPIPE, &sa, NULL)) < 0) { - perror("sigaction"); + PERROR("sigaction"); return ret; } @@ -4152,7 +5700,7 @@ static void set_ulimit(void) ret = setrlimit(RLIMIT_NOFILE, &lim); if (ret < 0) { - perror("failed to set open files limit"); + PERROR("failed to set open files limit"); } } @@ -4169,11 +5717,6 @@ int main(int argc, char **argv) rcu_register_thread(); - /* Create thread quit pipe */ - if ((ret = init_thread_quit_pipe()) < 0) { - goto error; - } - setup_consumerd_path(); /* Parse arguments */ @@ -4184,11 +5727,31 @@ int main(int argc, char **argv) /* Daemonize */ if (opt_daemon) { + int i; + + /* + * fork + * child: setsid, close FD 0, 1, 2, chdir / + * parent: exit (if fork is successful) + */ ret = daemon(0, 0); if (ret < 0) { - perror("daemon"); + PERROR("daemon"); goto error; } + /* + * We are in the child. Make sure all other file + * descriptors are closed, in case we are called with + * more opened file descriptors than the standard ones. + */ + for (i = 3; i < sysconf(_SC_OPEN_MAX); i++) { + (void) close(i); + } + } + + /* Create thread quit pipe */ + if ((ret = init_thread_quit_pipe()) < 0) { + goto error; } /* Check if daemon is UID = 0 */ @@ -4219,6 +5782,11 @@ int main(int argc, char **argv) DEFAULT_GLOBAL_APPS_WAIT_SHM_PATH); } + if (strlen(health_unix_sock_path) == 0) { + snprintf(health_unix_sock_path, sizeof(health_unix_sock_path), + DEFAULT_GLOBAL_HEALTH_UNIX_SOCK); + } + /* Setup kernel consumerd path */ snprintf(kconsumer_data.err_unix_sock_path, PATH_MAX, DEFAULT_KCONSUMERD_ERR_SOCK_PATH, rundir); @@ -4269,8 +5837,18 @@ int main(int argc, char **argv) snprintf(wait_shm_path, PATH_MAX, DEFAULT_HOME_APPS_WAIT_SHM_PATH, geteuid()); } + + /* Set health check Unix path */ + if (strlen(health_unix_sock_path) == 0) { + snprintf(health_unix_sock_path, sizeof(health_unix_sock_path), + DEFAULT_HOME_HEALTH_UNIX_SOCK, home_path); + } } + /* Set consumer initial state */ + kernel_consumerd_state = CONSUMER_STOPPED; + ust_consumerd_state = CONSUMER_STOPPED; + DBG("Client socket path %s", client_unix_sock_path); DBG("Application socket path %s", apps_unix_sock_path); DBG("LTTng run directory path: %s", rundir); @@ -4309,6 +5887,12 @@ int main(int argc, char **argv) goto error; } + /* + * Init UST app hash table. Alloc hash table before this point since + * cleanup() can get called after that point. + */ + ust_app_ht_alloc(); + /* After this point, we can safely call cleanup() with "goto exit" */ /* @@ -4331,6 +5915,8 @@ int main(int argc, char **argv) /* Set ulimit for open files */ set_ulimit(); } + /* init lttng_fd tracking must be done after set_ulimit. */ + lttng_fd_init(); ret = set_consumer_sockets(&ustconsumer64_data, rundir); if (ret < 0) { @@ -4362,21 +5948,18 @@ int main(int argc, char **argv) } /* Setup the kernel pipe for waking up the kernel thread */ - if ((ret = create_kernel_poll_pipe()) < 0) { + if ((ret = utils_create_pipe_cloexec(kernel_poll_pipe)) < 0) { goto exit; } /* Setup the thread apps communication pipe. */ - if ((ret = create_apps_cmd_pipe()) < 0) { + if ((ret = utils_create_pipe_cloexec(apps_cmd_pipe)) < 0) { goto exit; } /* Init UST command queue. */ cds_wfq_init(&ust_cmd_queue.queue); - /* Init UST app hash table */ - ust_app_ht_alloc(); - /* * Get session list pointer. This pointer MUST NOT be free(). This list is * statically declared in session.c @@ -4386,11 +5969,43 @@ int main(int argc, char **argv) /* Set up max poll set size */ lttng_poll_set_max_size(); + /* + * Set network sequence index to 1 for streams to match a relayd socket on + * the consumer side. + */ + uatomic_set(&relayd_net_seq_idx, 1); + + /* Init all health thread counters. */ + health_init(&health_thread_cmd); + health_init(&health_thread_kernel); + health_init(&health_thread_app_manage); + health_init(&health_thread_app_reg); + + /* + * Init health counters of the consumer thread. We do a quick hack here to + * the state of the consumer health is fine even if the thread is not + * started. This is simply to ease our life and has no cost what so ever. + */ + health_init(&kconsumer_data.health); + health_poll_update(&kconsumer_data.health); + health_init(&ustconsumer32_data.health); + health_poll_update(&ustconsumer32_data.health); + health_init(&ustconsumer64_data.health); + health_poll_update(&ustconsumer64_data.health); + + /* Create thread to manage the client socket */ + ret = pthread_create(&health_thread, NULL, + thread_manage_health, (void *) NULL); + if (ret != 0) { + PERROR("pthread_create health"); + goto exit_health; + } + /* Create thread to manage the client socket */ ret = pthread_create(&client_thread, NULL, thread_manage_clients, (void *) NULL); if (ret != 0) { - perror("pthread_create clients"); + PERROR("pthread_create clients"); goto exit_client; } @@ -4398,7 +6013,7 @@ int main(int argc, char **argv) ret = pthread_create(&dispatch_thread, NULL, thread_dispatch_ust_registration, (void *) NULL); if (ret != 0) { - perror("pthread_create dispatch"); + PERROR("pthread_create dispatch"); goto exit_dispatch; } @@ -4406,7 +6021,7 @@ int main(int argc, char **argv) ret = pthread_create(®_apps_thread, NULL, thread_registration_apps, (void *) NULL); if (ret != 0) { - perror("pthread_create registration"); + PERROR("pthread_create registration"); goto exit_reg_apps; } @@ -4414,7 +6029,7 @@ int main(int argc, char **argv) ret = pthread_create(&apps_thread, NULL, thread_manage_apps, (void *) NULL); if (ret != 0) { - perror("pthread_create apps"); + PERROR("pthread_create apps"); goto exit_apps; } @@ -4422,51 +6037,52 @@ int main(int argc, char **argv) ret = pthread_create(&kernel_thread, NULL, thread_manage_kernel, (void *) NULL); if (ret != 0) { - perror("pthread_create kernel"); + PERROR("pthread_create kernel"); goto exit_kernel; } ret = pthread_join(kernel_thread, &status); if (ret != 0) { - perror("pthread_join"); + PERROR("pthread_join"); goto error; /* join error, exit without cleanup */ } exit_kernel: ret = pthread_join(apps_thread, &status); if (ret != 0) { - perror("pthread_join"); + PERROR("pthread_join"); goto error; /* join error, exit without cleanup */ } exit_apps: ret = pthread_join(reg_apps_thread, &status); if (ret != 0) { - perror("pthread_join"); + PERROR("pthread_join"); goto error; /* join error, exit without cleanup */ } exit_reg_apps: ret = pthread_join(dispatch_thread, &status); if (ret != 0) { - perror("pthread_join"); + PERROR("pthread_join"); goto error; /* join error, exit without cleanup */ } exit_dispatch: ret = pthread_join(client_thread, &status); if (ret != 0) { - perror("pthread_join"); + PERROR("pthread_join"); goto error; /* join error, exit without cleanup */ } ret = join_consumer_thread(&kconsumer_data); if (ret != 0) { - perror("join_consumer"); + PERROR("join_consumer"); goto error; /* join error, exit without cleanup */ } exit_client: +exit_health: exit: /* * cleanup() is called when no other thread is running.