X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-sessiond%2Fmain.c;h=a54c0bf8df0a7c76d1bd39f4a4b99273501cb214;hp=a9bb2157e16b4089fa309bfb05c953bbf574780a;hb=0658924ecc1b36649ec467474bb82db8032a80c0;hpb=b816b2a6741cd41512a7102b24f278f8e297b864 diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c index a9bb2157e..43f750a82 100644 --- a/src/bin/lttng-sessiond/main.c +++ b/src/bin/lttng-sessiond/main.c @@ -1,6 +1,7 @@ /* * Copyright (C) 2011 - David Goulet * Mathieu Desnoyers + * 2013 - Jérémie Galarneau * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License, version 2 only, @@ -17,15 +18,17 @@ */ #define _GNU_SOURCE +#define _LGPL_SOURCE #include #include #include +#include #include -#include #include #include #include #include +#include #include #include #include @@ -38,15 +41,18 @@ #include #include -#include #include +#include #include #include #include #include #include +#include +#include #include "lttng-sessiond.h" +#include "buffer-registry.h" #include "channel.h" #include "cmd.h" #include "consumer.h" @@ -60,28 +66,38 @@ #include "ust-consumer.h" #include "utils.h" #include "fd-limit.h" -#include "filter.h" -#include "health.h" +#include "health-sessiond.h" +#include "testpoint.h" +#include "ust-thread.h" +#include "agent-thread.h" +#include "save.h" +#include "load-session-thread.h" +#include "syscall.h" +#include "agent.h" #define CONSUMERD_FILE "lttng-consumerd" -/* Const values */ -const char default_home_dir[] = DEFAULT_HOME_DIR; -const char default_tracing_group[] = DEFAULT_TRACING_GROUP; -const char default_ust_sock_dir[] = DEFAULT_UST_SOCK_DIR; -const char default_global_apps_pipe[] = DEFAULT_GLOBAL_APPS_PIPE; - const char *progname; -const char *opt_tracing_group; +static const char *tracing_group_name = DEFAULT_TRACING_GROUP; +static int tracing_group_name_override; +static char *opt_pidfile; static int opt_sig_parent; static int opt_verbose_consumer; -static int opt_daemon; +static int opt_daemon, opt_background; static int opt_no_kernel; -static int is_root; /* Set to 1 if the daemon is running as root */ +static char *opt_load_session_path; static pid_t ppid; /* Parent PID for --sig-parent option */ +static pid_t child_ppid; /* Internal parent PID use with daemonize. */ static char *rundir; +static int lockfile_fd = -1; + +/* Set to 1 when a SIGUSR1 signal is received. */ +static int recv_child_signal; -/* Consumer daemon specific control data */ +/* + * Consumer daemon specific control data. Every value not initialized here is + * set to 0 by the static definition. + */ static struct consumer_data kconsumer_data = { .type = LTTNG_CONSUMER_KERNEL, .err_unix_sock_path = DEFAULT_KCONSUMERD_ERR_SOCK_PATH, @@ -90,6 +106,8 @@ static struct consumer_data kconsumer_data = { .cmd_sock = -1, .pid_mutex = PTHREAD_MUTEX_INITIALIZER, .lock = PTHREAD_MUTEX_INITIALIZER, + .cond = PTHREAD_COND_INITIALIZER, + .cond_mutex = PTHREAD_MUTEX_INITIALIZER, }; static struct consumer_data ustconsumer64_data = { .type = LTTNG_CONSUMER64_UST, @@ -99,6 +117,8 @@ static struct consumer_data ustconsumer64_data = { .cmd_sock = -1, .pid_mutex = PTHREAD_MUTEX_INITIALIZER, .lock = PTHREAD_MUTEX_INITIALIZER, + .cond = PTHREAD_COND_INITIALIZER, + .cond_mutex = PTHREAD_MUTEX_INITIALIZER, }; static struct consumer_data ustconsumer32_data = { .type = LTTNG_CONSUMER32_UST, @@ -108,8 +128,46 @@ static struct consumer_data ustconsumer32_data = { .cmd_sock = -1, .pid_mutex = PTHREAD_MUTEX_INITIALIZER, .lock = PTHREAD_MUTEX_INITIALIZER, + .cond = PTHREAD_COND_INITIALIZER, + .cond_mutex = PTHREAD_MUTEX_INITIALIZER, }; +/* Command line options */ +static const struct option long_options[] = { + { "client-sock", required_argument, 0, 'c' }, + { "apps-sock", required_argument, 0, 'a' }, + { "kconsumerd-cmd-sock", required_argument, 0, '\0' }, + { "kconsumerd-err-sock", required_argument, 0, '\0' }, + { "ustconsumerd32-cmd-sock", required_argument, 0, '\0' }, + { "ustconsumerd32-err-sock", required_argument, 0, '\0' }, + { "ustconsumerd64-cmd-sock", required_argument, 0, '\0' }, + { "ustconsumerd64-err-sock", required_argument, 0, '\0' }, + { "consumerd32-path", required_argument, 0, '\0' }, + { "consumerd32-libdir", required_argument, 0, '\0' }, + { "consumerd64-path", required_argument, 0, '\0' }, + { "consumerd64-libdir", required_argument, 0, '\0' }, + { "daemonize", no_argument, 0, 'd' }, + { "background", no_argument, 0, 'b' }, + { "sig-parent", no_argument, 0, 'S' }, + { "help", no_argument, 0, 'h' }, + { "group", required_argument, 0, 'g' }, + { "version", no_argument, 0, 'V' }, + { "quiet", no_argument, 0, 'q' }, + { "verbose", no_argument, 0, 'v' }, + { "verbose-consumer", no_argument, 0, '\0' }, + { "no-kernel", no_argument, 0, '\0' }, + { "pidfile", required_argument, 0, 'p' }, + { "agent-tcp-port", required_argument, 0, '\0' }, + { "config", required_argument, 0, 'f' }, + { "load", required_argument, 0, 'l' }, + { "kmod-probes", required_argument, 0, '\0' }, + { "extra-kmod-probes", required_argument, 0, '\0' }, + { NULL, 0, 0, 0 } +}; + +/* Command line options to ignore from configuration file */ +static const char *config_ignore_options[] = { "help", "version", "config" }; + /* Shared between threads */ static int dispatch_thread_exit; @@ -133,6 +191,7 @@ static int kernel_poll_pipe[2] = { -1, -1 }; * for all threads when receiving an event on the pipe. */ static int thread_quit_pipe[2] = { -1, -1 }; +static int ht_cleanup_quit_pipe[2] = { -1, -1 }; /* * This pipe is used to inform the thread managing application communication @@ -140,20 +199,28 @@ static int thread_quit_pipe[2] = { -1, -1 }; */ static int apps_cmd_pipe[2] = { -1, -1 }; +int apps_cmd_notify_pipe[2] = { -1, -1 }; + /* Pthread, Mutexes and Semaphores */ static pthread_t apps_thread; +static pthread_t apps_notify_thread; static pthread_t reg_apps_thread; static pthread_t client_thread; static pthread_t kernel_thread; static pthread_t dispatch_thread; static pthread_t health_thread; +static pthread_t ht_cleanup_thread; +static pthread_t agent_reg_thread; +static pthread_t load_session_thread; /* * UST registration command queue. This queue is tied with a futex and uses a N * wakers / 1 waiter implemented and detailed in futex.c/.h * - * The thread_manage_apps and thread_dispatch_ust_registration interact with - * this queue and the wait/wake scheme. + * The thread_registration_apps and thread_dispatch_ust_registration uses this + * queue along with the wait/wake scheme. The thread_manage_apps receives down + * the line new application socket and monitors it for any I/O error or clean + * close that triggers an unregistration of the application. */ static struct ust_cmd_queue ust_cmd_queue; @@ -176,6 +243,10 @@ static const char *consumerd32_bin = CONFIG_CONSUMERD32_BIN; static const char *consumerd64_bin = CONFIG_CONSUMERD64_BIN; static const char *consumerd32_libdir = CONFIG_CONSUMERD32_LIBDIR; static const char *consumerd64_libdir = CONFIG_CONSUMERD64_LIBDIR; +static int consumerd32_bin_override; +static int consumerd64_bin_override; +static int consumerd32_libdir_override; +static int consumerd64_libdir_override; static const char *module_proc_lttng = "/proc/lttng"; @@ -213,11 +284,62 @@ enum consumerd_state { static enum consumerd_state ust_consumerd_state; static enum consumerd_state kernel_consumerd_state; -/* Used for the health monitoring of the session daemon. See health.h */ -struct health_state health_thread_cmd; -struct health_state health_thread_app_manage; -struct health_state health_thread_app_reg; -struct health_state health_thread_kernel; +/* + * Socket timeout for receiving and sending in seconds. + */ +static int app_socket_timeout; + +/* Set in main() with the current page size. */ +long page_size; + +/* Application health monitoring */ +struct health_app *health_sessiond; + +/* Agent TCP port for registration. Used by the agent thread. */ +unsigned int agent_tcp_port = DEFAULT_AGENT_TCP_PORT; + +/* Am I root or not. */ +int is_root; /* Set to 1 if the daemon is running as root */ + +const char * const config_section_name = "sessiond"; + +/* Load session thread information to operate. */ +struct load_session_thread_data *load_info; + +/* Global hash tables */ +struct lttng_ht *agent_apps_ht_by_sock = NULL; + +/* + * Whether sessiond is ready for commands/health check requests. + * NR_LTTNG_SESSIOND_READY must match the number of calls to + * sessiond_notify_ready(). + */ +#define NR_LTTNG_SESSIOND_READY 3 +int lttng_sessiond_ready = NR_LTTNG_SESSIOND_READY; + +/* Notify parents that we are ready for cmd and health check */ +LTTNG_HIDDEN +void sessiond_notify_ready(void) +{ + if (uatomic_sub_return(<tng_sessiond_ready, 1) == 0) { + /* + * Notify parent pid that we are ready to accept command + * for client side. This ppid is the one from the + * external process that spawned us. + */ + if (opt_sig_parent) { + kill(ppid, SIGUSR1); + } + + /* + * Notify the parent of the fork() process that we are + * ready. + */ + if (opt_daemon || opt_background) { + kill(child_ppid, SIGUSR1); + } + } +} static void setup_consumerd_path(void) @@ -250,36 +372,31 @@ void setup_consumerd_path(void) /* * runtime env. var. overrides the build default. */ - bin = getenv("LTTNG_CONSUMERD32_BIN"); + bin = lttng_secure_getenv("LTTNG_CONSUMERD32_BIN"); if (bin) { consumerd32_bin = bin; } - bin = getenv("LTTNG_CONSUMERD64_BIN"); + bin = lttng_secure_getenv("LTTNG_CONSUMERD64_BIN"); if (bin) { consumerd64_bin = bin; } - libdir = getenv("LTTNG_CONSUMERD32_LIBDIR"); + libdir = lttng_secure_getenv("LTTNG_CONSUMERD32_LIBDIR"); if (libdir) { consumerd32_libdir = libdir; } - libdir = getenv("LTTNG_CONSUMERD64_LIBDIR"); + libdir = lttng_secure_getenv("LTTNG_CONSUMERD64_LIBDIR"); if (libdir) { consumerd64_libdir = libdir; } } -/* - * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set. - */ -static int create_thread_poll_set(struct lttng_poll_event *events, - unsigned int size) +static +int __sessiond_set_thread_pollset(struct lttng_poll_event *events, size_t size, + int *a_pipe) { int ret; - if (events == NULL || size == 0) { - ret = -1; - goto error; - } + assert(events); ret = lttng_poll_create(events, size, LTTNG_CLOEXEC); if (ret < 0) { @@ -287,7 +404,7 @@ static int create_thread_poll_set(struct lttng_poll_event *events, } /* Add quit pipe */ - ret = lttng_poll_add(events, thread_quit_pipe[0], LPOLLIN); + ret = lttng_poll_add(events, a_pipe[0], LPOLLIN | LPOLLERR); if (ret < 0) { goto error; } @@ -299,36 +416,52 @@ error: } /* - * Check if the thread quit pipe was triggered. - * - * Return 1 if it was triggered else 0; + * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set. + */ +int sessiond_set_thread_pollset(struct lttng_poll_event *events, size_t size) +{ + return __sessiond_set_thread_pollset(events, size, thread_quit_pipe); +} + +/* + * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set. */ -static int check_thread_quit_pipe(int fd, uint32_t events) +int sessiond_set_ht_cleanup_thread_pollset(struct lttng_poll_event *events, + size_t size) { - if (fd == thread_quit_pipe[0] && (events & LPOLLIN)) { + return __sessiond_set_thread_pollset(events, size, + ht_cleanup_quit_pipe); +} + +static +int __sessiond_check_thread_quit_pipe(int fd, uint32_t events, int a_pipe) +{ + if (fd == a_pipe && (events & LPOLLIN)) { return 1; } - return 0; } /* - * Return group ID of the tracing group or -1 if not found. + * Check if the thread quit pipe was triggered. + * + * Return 1 if it was triggered else 0; */ -static gid_t allowed_group(void) +int sessiond_check_thread_quit_pipe(int fd, uint32_t events) { - struct group *grp; + return __sessiond_check_thread_quit_pipe(fd, events, + thread_quit_pipe[0]); +} - if (opt_tracing_group) { - grp = getgrnam(opt_tracing_group); - } else { - grp = getgrnam(default_tracing_group); - } - if (!grp) { - return -1; - } else { - return grp->gr_gid; - } +/* + * Check if the ht_cleanup thread quit pipe was triggered. + * + * Return 1 if it was triggered else 0; + */ +int sessiond_check_ht_cleanup_quit(int fd, uint32_t events) +{ + return __sessiond_check_thread_quit_pipe(fd, events, + ht_cleanup_quit_pipe[0]); } /* @@ -336,18 +469,18 @@ static gid_t allowed_group(void) * * Return -1 on error or 0 if all pipes are created. */ -static int init_thread_quit_pipe(void) +static int __init_thread_quit_pipe(int *a_pipe) { int ret, i; - ret = pipe(thread_quit_pipe); + ret = pipe(a_pipe); if (ret < 0) { PERROR("thread quit pipe"); goto error; } for (i = 0; i < 2; i++) { - ret = fcntl(thread_quit_pipe[i], F_SETFD, FD_CLOEXEC); + ret = fcntl(a_pipe[i], F_SETFD, FD_CLOEXEC); if (ret < 0) { PERROR("fcntl"); goto error; @@ -358,6 +491,16 @@ error: return ret; } +static int init_thread_quit_pipe(void) +{ + return __init_thread_quit_pipe(thread_quit_pipe); +} + +static int init_ht_cleanup_quit_pipe(void) +{ + return __init_thread_quit_pipe(ht_cleanup_quit_pipe); +} + /* * Stop all threads by closing the thread quit pipe. */ @@ -378,31 +521,151 @@ static void stop_threads(void) } /* - * Cleanup the daemon + * Close every consumer sockets. */ -static void cleanup(void) +static void close_consumer_sockets(void) { int ret; - char *cmd; - struct ltt_session *sess, *stmp; - DBG("Cleaning up"); + if (kconsumer_data.err_sock >= 0) { + ret = close(kconsumer_data.err_sock); + if (ret < 0) { + PERROR("kernel consumer err_sock close"); + } + } + if (ustconsumer32_data.err_sock >= 0) { + ret = close(ustconsumer32_data.err_sock); + if (ret < 0) { + PERROR("UST consumerd32 err_sock close"); + } + } + if (ustconsumer64_data.err_sock >= 0) { + ret = close(ustconsumer64_data.err_sock); + if (ret < 0) { + PERROR("UST consumerd64 err_sock close"); + } + } + if (kconsumer_data.cmd_sock >= 0) { + ret = close(kconsumer_data.cmd_sock); + if (ret < 0) { + PERROR("kernel consumer cmd_sock close"); + } + } + if (ustconsumer32_data.cmd_sock >= 0) { + ret = close(ustconsumer32_data.cmd_sock); + if (ret < 0) { + PERROR("UST consumerd32 cmd_sock close"); + } + } + if (ustconsumer64_data.cmd_sock >= 0) { + ret = close(ustconsumer64_data.cmd_sock); + if (ret < 0) { + PERROR("UST consumerd64 cmd_sock close"); + } + } +} - /* First thing first, stop all threads */ - utils_close_pipe(thread_quit_pipe); +/* + * Generate the full lock file path using the rundir. + * + * Return the snprintf() return value thus a negative value is an error. + */ +static int generate_lock_file_path(char *path, size_t len) +{ + int ret; + + assert(path); + assert(rundir); - DBG("Removing %s directory", rundir); - ret = asprintf(&cmd, "rm -rf %s", rundir); + /* Build lockfile path from rundir. */ + ret = snprintf(path, len, "%s/" DEFAULT_LTTNG_SESSIOND_LOCKFILE, rundir); if (ret < 0) { - ERR("asprintf failed. Something is really wrong!"); + PERROR("snprintf lockfile path"); } - /* Remove lttng run directory */ - ret = system(cmd); - if (ret < 0) { - ERR("Unable to clean %s", rundir); + return ret; +} + +/* + * Cleanup the session daemon's data structures. + */ +static void sessiond_cleanup(void) +{ + int ret; + struct ltt_session *sess, *stmp; + char path[PATH_MAX]; + + DBG("Cleanup sessiond"); + + /* + * Close the thread quit pipe. It has already done its job, + * since we are now called. + */ + utils_close_pipe(thread_quit_pipe); + + /* + * If opt_pidfile is undefined, the default file will be wiped when + * removing the rundir. + */ + if (opt_pidfile) { + ret = remove(opt_pidfile); + if (ret < 0) { + PERROR("remove pidfile %s", opt_pidfile); + } } - free(cmd); + + DBG("Removing sessiond and consumerd content of directory %s", rundir); + + /* sessiond */ + snprintf(path, PATH_MAX, + "%s/%s", + rundir, DEFAULT_LTTNG_SESSIOND_PIDFILE); + DBG("Removing %s", path); + (void) unlink(path); + + snprintf(path, PATH_MAX, "%s/%s", rundir, + DEFAULT_LTTNG_SESSIOND_AGENTPORT_FILE); + DBG("Removing %s", path); + (void) unlink(path); + + /* kconsumerd */ + snprintf(path, PATH_MAX, + DEFAULT_KCONSUMERD_ERR_SOCK_PATH, + rundir); + DBG("Removing %s", path); + (void) unlink(path); + + snprintf(path, PATH_MAX, + DEFAULT_KCONSUMERD_PATH, + rundir); + DBG("Removing directory %s", path); + (void) rmdir(path); + + /* ust consumerd 32 */ + snprintf(path, PATH_MAX, + DEFAULT_USTCONSUMERD32_ERR_SOCK_PATH, + rundir); + DBG("Removing %s", path); + (void) unlink(path); + + snprintf(path, PATH_MAX, + DEFAULT_USTCONSUMERD32_PATH, + rundir); + DBG("Removing directory %s", path); + (void) rmdir(path); + + /* ust consumerd 64 */ + snprintf(path, PATH_MAX, + DEFAULT_USTCONSUMERD64_ERR_SOCK_PATH, + rundir); + DBG("Removing %s", path); + (void) unlink(path); + + snprintf(path, PATH_MAX, + DEFAULT_USTCONSUMERD64_PATH, + rundir); + DBG("Removing directory %s", path); + (void) rmdir(path); DBG("Cleaning up all sessions"); @@ -417,8 +680,12 @@ static void cleanup(void) } } + DBG("Cleaning up all agent apps"); + agent_app_ht_clean(); + DBG("Closing all UST sockets"); ust_app_clean_list(); + buffer_reg_destroy_registries(); if (is_root && !opt_no_kernel) { DBG2("Closing kernel fd"); @@ -430,10 +697,77 @@ static void cleanup(void) } DBG("Unloading kernel modules"); modprobe_remove_lttng_all(); + free(syscall_table); } - utils_close_pipe(kernel_poll_pipe); - utils_close_pipe(apps_cmd_pipe); + close_consumer_sockets(); + + if (load_info) { + load_session_destroy_data(load_info); + free(load_info); + } + + /* + * Cleanup lock file by deleting it and finaly closing it which will + * release the file system lock. + */ + if (lockfile_fd >= 0) { + char lockfile_path[PATH_MAX]; + + ret = generate_lock_file_path(lockfile_path, + sizeof(lockfile_path)); + if (ret > 0) { + ret = remove(lockfile_path); + if (ret < 0) { + PERROR("remove lock file"); + } + ret = close(lockfile_fd); + if (ret < 0) { + PERROR("close lock file"); + } + } + } + + /* + * We do NOT rmdir rundir because there are other processes + * using it, for instance lttng-relayd, which can start in + * parallel with this teardown. + */ + + free(rundir); +} + +/* + * Cleanup the daemon's option data structures. + */ +static void sessiond_cleanup_options(void) +{ + DBG("Cleaning up options"); + + /* + * If the override option is set, the pointer points to a *non* const + * thus freeing it even though the variable type is set to const. + */ + if (tracing_group_name_override) { + free((void *) tracing_group_name); + } + if (consumerd32_bin_override) { + free((void *) consumerd32_bin); + } + if (consumerd64_bin_override) { + free((void *) consumerd64_bin); + } + if (consumerd32_libdir_override) { + free((void *) consumerd32_libdir); + } + if (consumerd64_libdir_override) { + free((void *) consumerd64_libdir); + } + + free(opt_pidfile); + free(opt_load_session_path); + free(kmod_probes_list); + free(kmod_extra_probes_list); /* */ DBG("%c[%d;%dm*** assert failed :-) *** ==> %c[%dm%c[%d;%dm" @@ -450,7 +784,7 @@ static void cleanup(void) static int send_unix_sock(int sock, void *buf, size_t len) { /* Check valid length */ - if (len <= 0) { + if (len == 0) { return -1; } @@ -598,40 +932,47 @@ static int update_kernel_stream(struct consumer_data *consumer_data, int fd) } ksess = session->kernel_session; - cds_list_for_each_entry(channel, &ksess->channel_list.head, list) { - if (channel->fd == fd) { - DBG("Channel found, updating kernel streams"); - ret = kernel_open_channel_stream(channel); + cds_list_for_each_entry(channel, + &ksess->channel_list.head, list) { + struct lttng_ht_iter iter; + struct consumer_socket *socket; + + if (channel->fd != fd) { + continue; + } + DBG("Channel found, updating kernel streams"); + ret = kernel_open_channel_stream(channel); + if (ret < 0) { + goto error; + } + /* Update the stream global counter */ + ksess->stream_count_global += ret; + + /* + * Have we already sent fds to the consumer? If yes, it + * means that tracing is started so it is safe to send + * our updated stream fds. + */ + if (ksess->consumer_fds_sent != 1 + || ksess->consumer == NULL) { + ret = -1; + goto error; + } + + rcu_read_lock(); + cds_lfht_for_each_entry(ksess->consumer->socks->ht, + &iter.iter, socket, node.node) { + pthread_mutex_lock(socket->lock); + ret = kernel_consumer_send_channel_stream(socket, + channel, ksess, + session->output_traces ? 1 : 0); + pthread_mutex_unlock(socket->lock); if (ret < 0) { + rcu_read_unlock(); goto error; } - - /* - * Have we already sent fds to the consumer? If yes, it means - * that tracing is started so it is safe to send our updated - * stream fds. - */ - if (ksess->consumer_fds_sent == 1 && ksess->consumer != NULL) { - struct lttng_ht_iter iter; - struct consumer_socket *socket; - - - cds_lfht_for_each_entry(ksess->consumer->socks->ht, - &iter.iter, socket, node.node) { - /* Code flow error */ - assert(socket->fd >= 0); - - pthread_mutex_lock(socket->lock); - ret = kernel_consumer_send_channel_stream(socket->fd, - channel, ksess); - pthread_mutex_unlock(socket->lock); - if (ret < 0) { - goto error; - } - } - } - goto error; } + rcu_read_unlock(); } session_unlock(session); } @@ -645,24 +986,47 @@ error: } /* - * For each tracing session, update newly registered apps. + * For each tracing session, update newly registered apps. The session list + * lock MUST be acquired before calling this. */ static void update_ust_app(int app_sock) { struct ltt_session *sess, *stmp; - session_lock_list(); + /* Consumer is in an ERROR state. Stop any application update. */ + if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) { + /* Stop the update process since the consumer is dead. */ + return; + } /* For all tracing session(s) */ cds_list_for_each_entry_safe(sess, stmp, &session_list_ptr->head, list) { + struct ust_app *app; + session_lock(sess); - if (sess->ust_session) { - ust_app_global_update(sess->ust_session, app_sock); + if (!sess->ust_session) { + goto unlock_session; + } + + rcu_read_lock(); + assert(app_sock >= 0); + app = ust_app_find_by_sock(app_sock); + if (app == NULL) { + /* + * Application can be unregistered before so + * this is possible hence simply stopping the + * update. + */ + DBG3("UST app update failed to find app sock %d", + app_sock); + goto unlock_rcu; } + ust_app_global_update(sess->ust_session, app); + unlock_rcu: + rcu_read_unlock(); + unlock_session: session_unlock(sess); } - - session_unlock_list(); } /* @@ -678,30 +1042,44 @@ static void *thread_manage_kernel(void *data) char tmp; struct lttng_poll_event events; - DBG("Thread manage kernel started"); + DBG("[thread] Thread manage kernel started"); - health_code_update(&health_thread_kernel); + health_register(health_sessiond, HEALTH_SESSIOND_TYPE_KERNEL); - ret = create_thread_poll_set(&events, 2); - if (ret < 0) { - goto error_poll_create; + /* + * This first step of the while is to clean this structure which could free + * non NULL pointers so initialize it before the loop. + */ + lttng_poll_init(&events); + + if (testpoint(sessiond_thread_manage_kernel)) { + goto error_testpoint; } - ret = lttng_poll_add(&events, kernel_poll_pipe[0], LPOLLIN); - if (ret < 0) { - goto error; + health_code_update(); + + if (testpoint(sessiond_thread_manage_kernel_before_loop)) { + goto error_testpoint; } while (1) { - health_code_update(&health_thread_kernel); + health_code_update(); if (update_poll_flag == 1) { - /* - * Reset number of fd in the poll set. Always 2 since there is the thread - * quit pipe and the kernel pipe. - */ - events.nb_fd = 2; + /* Clean events object. We are about to populate it again. */ + lttng_poll_clean(&events); + + ret = sessiond_set_thread_pollset(&events, 2); + if (ret < 0) { + goto error_poll_create; + } + + ret = lttng_poll_add(&events, kernel_poll_pipe[0], LPOLLIN); + if (ret < 0) { + goto error; + } + /* This will add the available kernel channel if any. */ ret = update_kernel_poll(&events); if (ret < 0) { goto error; @@ -709,18 +1087,15 @@ static void *thread_manage_kernel(void *data) update_poll_flag = 0; } - nb_fd = LTTNG_POLL_GETNB(&events); - - DBG("Thread kernel polling on %d fds", nb_fd); - - /* Zeroed the poll events */ - lttng_poll_reset(&events); + DBG("Thread kernel polling"); /* Poll infinite value of time */ restart: - health_poll_update(&health_thread_kernel); + health_poll_entry(); ret = lttng_poll_wait(&events, -1); - health_poll_update(&health_thread_kernel); + DBG("Thread kernel return from poll on %d fds", + LTTNG_POLL_GETNB(&events)); + health_poll_exit(); if (ret < 0) { /* * Restart interrupted system call. @@ -736,15 +1111,22 @@ static void *thread_manage_kernel(void *data) continue; } + nb_fd = ret; + for (i = 0; i < nb_fd; i++) { /* Fetch once the poll data */ revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); - health_code_update(&health_thread_kernel); + health_code_update(); + + if (!revents) { + /* No activity for this FD (poll implementation). */ + continue; + } /* Thread quit pipe has been closed. Killing thread. */ - ret = check_thread_quit_pipe(pollfd, revents); + ret = sessiond_check_thread_quit_pipe(pollfd, revents); if (ret) { err = 0; goto exit; @@ -752,7 +1134,12 @@ static void *thread_manage_kernel(void *data) /* Check for data on kernel pipe */ if (pollfd == kernel_poll_pipe[0] && (revents & LPOLLIN)) { - ret = read(kernel_poll_pipe[0], &tmp, 1); + (void) lttng_read(kernel_poll_pipe[0], + &tmp, 1); + /* + * Ret value is useless here, if this pipe gets any actions an + * update is required anyway. + */ update_poll_flag = 1; continue; } else { @@ -779,21 +1166,49 @@ exit: error: lttng_poll_clean(&events); error_poll_create: +error_testpoint: + utils_close_pipe(kernel_poll_pipe); + kernel_poll_pipe[0] = kernel_poll_pipe[1] = -1; if (err) { - health_error(&health_thread_kernel); + health_error(); ERR("Health error occurred in %s", __func__); + WARN("Kernel thread died unexpectedly. " + "Kernel tracing can continue but CPU hotplug is disabled."); } - health_exit(&health_thread_kernel); + health_unregister(health_sessiond); DBG("Kernel thread dying"); return NULL; } +/* + * Signal pthread condition of the consumer data that the thread. + */ +static void signal_consumer_condition(struct consumer_data *data, int state) +{ + pthread_mutex_lock(&data->cond_mutex); + + /* + * The state is set before signaling. It can be any value, it's the waiter + * job to correctly interpret this condition variable associated to the + * consumer pthread_cond. + * + * A value of 0 means that the corresponding thread of the consumer data + * was not started. 1 indicates that the thread has started and is ready + * for action. A negative value means that there was an error during the + * thread bootstrap. + */ + data->consumer_thread_is_ready = state; + (void) pthread_cond_signal(&data->cond); + + pthread_mutex_unlock(&data->cond_mutex); +} + /* * This thread manage the consumer error sent back to the session daemon. */ static void *thread_manage_consumer(void *data) { - int sock = -1, i, ret, pollfd, err = -1; + int sock = -1, i, ret, pollfd, err = -1, should_quit = 0; uint32_t revents, nb_fd; enum lttcomm_return_code code; struct lttng_poll_event events; @@ -801,36 +1216,44 @@ static void *thread_manage_consumer(void *data) DBG("[thread] Manage consumer started"); - health_code_update(&consumer_data->health); + rcu_register_thread(); + rcu_thread_online(); + + health_register(health_sessiond, HEALTH_SESSIOND_TYPE_CONSUMER); - ret = lttcomm_listen_unix_sock(consumer_data->err_sock); - if (ret < 0) { - goto error_listen; - } + health_code_update(); /* - * Pass 2 as size here for the thread quit pipe and kconsumerd_err_sock. - * Nothing more will be added to this poll set. + * Pass 3 as size here for the thread quit pipe, consumerd_err_sock and the + * metadata_sock. Nothing more will be added to this poll set. */ - ret = create_thread_poll_set(&events, 2); + ret = sessiond_set_thread_pollset(&events, 3); if (ret < 0) { goto error_poll; } + /* + * The error socket here is already in a listening state which was done + * just before spawning this thread to avoid a race between the consumer + * daemon exec trying to connect and the listen() call. + */ ret = lttng_poll_add(&events, consumer_data->err_sock, LPOLLIN | LPOLLRDHUP); if (ret < 0) { goto error; } - nb_fd = LTTNG_POLL_GETNB(&events); - - health_code_update(&consumer_data->health); + health_code_update(); - /* Inifinite blocking call, waiting for transmission */ + /* Infinite blocking call, waiting for transmission */ restart: - health_poll_update(&consumer_data->health); + health_poll_entry(); + + if (testpoint(sessiond_thread_manage_consumer)) { + goto error; + } + ret = lttng_poll_wait(&events, -1); - health_poll_update(&consumer_data->health); + health_poll_exit(); if (ret < 0) { /* * Restart interrupted system call. @@ -841,15 +1264,22 @@ restart: goto error; } + nb_fd = ret; + for (i = 0; i < nb_fd; i++) { /* Fetch once the poll data */ revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); - health_code_update(&consumer_data->health); + health_code_update(); + + if (!revents) { + /* No activity for this FD (poll implementation). */ + continue; + } /* Thread quit pipe has been closed. Killing thread. */ - ret = check_thread_quit_pipe(pollfd, revents); + ret = sessiond_check_thread_quit_pipe(pollfd, revents); if (ret) { err = 0; goto exit; @@ -869,7 +1299,13 @@ restart: goto error; } - health_code_update(&consumer_data->health); + /* + * Set the CLOEXEC flag. Return code is useless because either way, the + * show must go on. + */ + (void) utils_set_fd_cloexec(sock); + + health_code_update(); DBG2("Receiving code from consumer err_sock"); @@ -880,93 +1316,149 @@ restart: goto error; } - health_code_update(&consumer_data->health); - - if (code == CONSUMERD_COMMAND_SOCK_READY) { + health_code_update(); + if (code == LTTCOMM_CONSUMERD_COMMAND_SOCK_READY) { + /* Connect both socket, command and metadata. */ consumer_data->cmd_sock = lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path); - if (consumer_data->cmd_sock < 0) { - sem_post(&consumer_data->sem); - PERROR("consumer connect"); + consumer_data->metadata_fd = + lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path); + if (consumer_data->cmd_sock < 0 + || consumer_data->metadata_fd < 0) { + PERROR("consumer connect cmd socket"); + /* On error, signal condition and quit. */ + signal_consumer_condition(consumer_data, -1); + goto error; + } + consumer_data->metadata_sock.fd_ptr = &consumer_data->metadata_fd; + /* Create metadata socket lock. */ + consumer_data->metadata_sock.lock = zmalloc(sizeof(pthread_mutex_t)); + if (consumer_data->metadata_sock.lock == NULL) { + PERROR("zmalloc pthread mutex"); + ret = -1; goto error; } - /* Signal condition to tell that the kconsumerd is ready */ - sem_post(&consumer_data->sem); - DBG("consumer command socket ready"); + pthread_mutex_init(consumer_data->metadata_sock.lock, NULL); + + signal_consumer_condition(consumer_data, 1); + DBG("Consumer command socket ready (fd: %d", consumer_data->cmd_sock); + DBG("Consumer metadata socket ready (fd: %d)", + consumer_data->metadata_fd); } else { ERR("consumer error when waiting for SOCK_READY : %s", lttcomm_get_readable_code(-code)); goto error; } - /* Remove the kconsumerd error sock since we've established a connexion */ + /* Remove the consumerd error sock since we've established a connexion */ ret = lttng_poll_del(&events, consumer_data->err_sock); if (ret < 0) { goto error; } + /* Add new accepted error socket. */ ret = lttng_poll_add(&events, sock, LPOLLIN | LPOLLRDHUP); if (ret < 0) { goto error; } - health_code_update(&consumer_data->health); + /* Add metadata socket that is successfully connected. */ + ret = lttng_poll_add(&events, consumer_data->metadata_fd, + LPOLLIN | LPOLLRDHUP); + if (ret < 0) { + goto error; + } - /* Update number of fd */ - nb_fd = LTTNG_POLL_GETNB(&events); + health_code_update(); - /* Inifinite blocking call, waiting for transmission */ + /* Infinite blocking call, waiting for transmission */ restart_poll: - health_poll_update(&consumer_data->health); - ret = lttng_poll_wait(&events, -1); - health_poll_update(&consumer_data->health); - if (ret < 0) { - /* - * Restart interrupted system call. - */ - if (errno == EINTR) { - goto restart_poll; - } - goto error; - } - - for (i = 0; i < nb_fd; i++) { - /* Fetch once the poll data */ - revents = LTTNG_POLL_GETEV(&events, i); - pollfd = LTTNG_POLL_GETFD(&events, i); - - health_code_update(&consumer_data->health); + while (1) { + health_code_update(); - /* Thread quit pipe has been closed. Killing thread. */ - ret = check_thread_quit_pipe(pollfd, revents); - if (ret) { + /* Exit the thread because the thread quit pipe has been triggered. */ + if (should_quit) { + /* Not a health error. */ err = 0; goto exit; } - /* Event on the kconsumerd socket */ - if (pollfd == sock) { - if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) { - ERR("consumer err socket second poll error"); - goto error; + health_poll_entry(); + ret = lttng_poll_wait(&events, -1); + health_poll_exit(); + if (ret < 0) { + /* + * Restart interrupted system call. + */ + if (errno == EINTR) { + goto restart_poll; } + goto error; } - } - health_code_update(&consumer_data->health); + nb_fd = ret; - /* Wait for any kconsumerd error */ - ret = lttcomm_recv_unix_sock(sock, &code, - sizeof(enum lttcomm_return_code)); - if (ret <= 0) { - ERR("consumer closed the command socket"); - goto error; - } + for (i = 0; i < nb_fd; i++) { + /* Fetch once the poll data */ + revents = LTTNG_POLL_GETEV(&events, i); + pollfd = LTTNG_POLL_GETFD(&events, i); + + health_code_update(); + + if (!revents) { + /* No activity for this FD (poll implementation). */ + continue; + } + + /* + * Thread quit pipe has been triggered, flag that we should stop + * but continue the current loop to handle potential data from + * consumer. + */ + should_quit = sessiond_check_thread_quit_pipe(pollfd, revents); + + if (pollfd == sock) { + /* Event on the consumerd socket */ + if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) { + ERR("consumer err socket second poll error"); + goto error; + } + health_code_update(); + /* Wait for any kconsumerd error */ + ret = lttcomm_recv_unix_sock(sock, &code, + sizeof(enum lttcomm_return_code)); + if (ret <= 0) { + ERR("consumer closed the command socket"); + goto error; + } + + ERR("consumer return code : %s", + lttcomm_get_readable_code(-code)); - ERR("consumer return code : %s", lttcomm_get_readable_code(-code)); + goto exit; + } else if (pollfd == consumer_data->metadata_fd) { + /* UST metadata requests */ + ret = ust_consumer_metadata_request( + &consumer_data->metadata_sock); + if (ret < 0) { + ERR("Handling metadata request"); + goto error; + } + } + /* No need for an else branch all FDs are tested prior. */ + } + health_code_update(); + } exit: error: + /* + * We lock here because we are about to close the sockets and some other + * thread might be using them so get exclusive access which will abort all + * other consumer command by other threads. + */ + pthread_mutex_lock(&consumer_data->lock); + /* Immediately set the consumerd state to stopped */ if (consumer_data->type == LTTNG_CONSUMER_KERNEL) { uatomic_set(&kernel_consumerd_state, CONSUMER_ERROR); @@ -983,12 +1475,21 @@ error: if (ret) { PERROR("close"); } + consumer_data->err_sock = -1; } if (consumer_data->cmd_sock >= 0) { ret = close(consumer_data->cmd_sock); if (ret) { PERROR("close"); } + consumer_data->cmd_sock = -1; + } + if (consumer_data->metadata_sock.fd_ptr && + *consumer_data->metadata_sock.fd_ptr >= 0) { + ret = close(*consumer_data->metadata_sock.fd_ptr); + if (ret) { + PERROR("close"); + } } if (sock >= 0) { ret = close(sock); @@ -1000,17 +1501,25 @@ error: unlink(consumer_data->err_unix_sock_path); unlink(consumer_data->cmd_unix_sock_path); consumer_data->pid = 0; + pthread_mutex_unlock(&consumer_data->lock); + /* Cleanup metadata socket mutex. */ + if (consumer_data->metadata_sock.lock) { + pthread_mutex_destroy(consumer_data->metadata_sock.lock); + free(consumer_data->metadata_sock.lock); + } lttng_poll_clean(&events); error_poll: -error_listen: if (err) { - health_error(&consumer_data->health); + health_error(); ERR("Health error occurred in %s", __func__); } - health_exit(&consumer_data->health); + health_unregister(health_sessiond); DBG("consumer thread cleanup completed"); + rcu_thread_offline(); + rcu_unregister_thread(); + return NULL; } @@ -1020,8 +1529,8 @@ error_listen: static void *thread_manage_apps(void *data) { int i, ret, pollfd, err = -1; + ssize_t size_ret; uint32_t revents, nb_fd; - struct ust_command ust_cmd; struct lttng_poll_event events; DBG("[thread] Manage application started"); @@ -1029,9 +1538,15 @@ static void *thread_manage_apps(void *data) rcu_register_thread(); rcu_thread_online(); - health_code_update(&health_thread_app_manage); + health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_MANAGE); + + if (testpoint(sessiond_thread_manage_apps)) { + goto error_testpoint; + } + + health_code_update(); - ret = create_thread_poll_set(&events, 2); + ret = sessiond_set_thread_pollset(&events, 2); if (ret < 0) { goto error_poll_create; } @@ -1041,21 +1556,22 @@ static void *thread_manage_apps(void *data) goto error; } - health_code_update(&health_thread_app_manage); - - while (1) { - /* Zeroed the events structure */ - lttng_poll_reset(&events); + if (testpoint(sessiond_thread_manage_apps_before_loop)) { + goto error; + } - nb_fd = LTTNG_POLL_GETNB(&events); + health_code_update(); - DBG("Apps thread polling on %d fds", nb_fd); + while (1) { + DBG("Apps thread polling"); /* Inifinite blocking call, waiting for transmission */ restart: - health_poll_update(&health_thread_app_manage); + health_poll_entry(); ret = lttng_poll_wait(&events, -1); - health_poll_update(&health_thread_app_manage); + DBG("Apps thread return from poll on %d fds", + LTTNG_POLL_GETNB(&events)); + health_poll_exit(); if (ret < 0) { /* * Restart interrupted system call. @@ -1066,15 +1582,22 @@ static void *thread_manage_apps(void *data) goto error; } + nb_fd = ret; + for (i = 0; i < nb_fd; i++) { /* Fetch once the poll data */ revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); - health_code_update(&health_thread_app_manage); + health_code_update(); + + if (!revents) { + /* No activity for this FD (poll implementation). */ + continue; + } /* Thread quit pipe has been closed. Killing thread. */ - ret = check_thread_quit_pipe(pollfd, revents); + ret = sessiond_check_thread_quit_pipe(pollfd, revents); if (ret) { err = 0; goto exit; @@ -1086,66 +1609,29 @@ static void *thread_manage_apps(void *data) ERR("Apps command pipe error"); goto error; } else if (revents & LPOLLIN) { + int sock; + /* Empty pipe */ - ret = read(apps_cmd_pipe[0], &ust_cmd, sizeof(ust_cmd)); - if (ret < 0 || ret < sizeof(ust_cmd)) { + size_ret = lttng_read(apps_cmd_pipe[0], &sock, sizeof(sock)); + if (size_ret < sizeof(sock)) { PERROR("read apps cmd pipe"); goto error; } - health_code_update(&health_thread_app_manage); - - /* Register applicaton to the session daemon */ - ret = ust_app_register(&ust_cmd.reg_msg, - ust_cmd.sock); - if (ret == -ENOMEM) { - goto error; - } else if (ret < 0) { - break; - } - - health_code_update(&health_thread_app_manage); + health_code_update(); /* - * Validate UST version compatibility. + * We only monitor the error events of the socket. This + * thread does not handle any incoming data from UST + * (POLLIN). */ - ret = ust_app_validate_version(ust_cmd.sock); - if (ret >= 0) { - /* - * Add channel(s) and event(s) to newly registered apps - * from lttng global UST domain. - */ - update_ust_app(ust_cmd.sock); - } - - health_code_update(&health_thread_app_manage); - - ret = ust_app_register_done(ust_cmd.sock); + ret = lttng_poll_add(&events, sock, + LPOLLERR | LPOLLHUP | LPOLLRDHUP); if (ret < 0) { - /* - * If the registration is not possible, we simply - * unregister the apps and continue - */ - ust_app_unregister(ust_cmd.sock); - } else { - /* - * We just need here to monitor the close of the UST - * socket and poll set monitor those by default. - * Listen on POLLIN (even if we never expect any - * data) to ensure that hangup wakes us. - */ - ret = lttng_poll_add(&events, ust_cmd.sock, LPOLLIN); - if (ret < 0) { - goto error; - } - - DBG("Apps with sock %d added to poll set", - ust_cmd.sock); + goto error; } - health_code_update(&health_thread_app_manage); - - break; + DBG("Apps with sock %d added to poll set", sock); } } else { /* @@ -1161,11 +1647,10 @@ static void *thread_manage_apps(void *data) /* Socket closed on remote end. */ ust_app_unregister(pollfd); - break; } } - health_code_update(&health_thread_app_manage); + health_code_update(); } } @@ -1173,36 +1658,201 @@ exit: error: lttng_poll_clean(&events); error_poll_create: +error_testpoint: + utils_close_pipe(apps_cmd_pipe); + apps_cmd_pipe[0] = apps_cmd_pipe[1] = -1; + + /* + * We don't clean the UST app hash table here since already registered + * applications can still be controlled so let them be until the session + * daemon dies or the applications stop. + */ + if (err) { - health_error(&health_thread_app_manage); + health_error(); ERR("Health error occurred in %s", __func__); } - health_exit(&health_thread_app_manage); + health_unregister(health_sessiond); DBG("Application communication apps thread cleanup complete"); rcu_thread_offline(); rcu_unregister_thread(); return NULL; } +/* + * Send a socket to a thread This is called from the dispatch UST registration + * thread once all sockets are set for the application. + * + * The sock value can be invalid, we don't really care, the thread will handle + * it and make the necessary cleanup if so. + * + * On success, return 0 else a negative value being the errno message of the + * write(). + */ +static int send_socket_to_thread(int fd, int sock) +{ + ssize_t ret; + + /* + * It's possible that the FD is set as invalid with -1 concurrently just + * before calling this function being a shutdown state of the thread. + */ + if (fd < 0) { + ret = -EBADF; + goto error; + } + + ret = lttng_write(fd, &sock, sizeof(sock)); + if (ret < sizeof(sock)) { + PERROR("write apps pipe %d", fd); + if (ret < 0) { + ret = -errno; + } + goto error; + } + + /* All good. Don't send back the write positive ret value. */ + ret = 0; +error: + return (int) ret; +} + +/* + * Sanitize the wait queue of the dispatch registration thread meaning removing + * invalid nodes from it. This is to avoid memory leaks for the case the UST + * notify socket is never received. + */ +static void sanitize_wait_queue(struct ust_reg_wait_queue *wait_queue) +{ + int ret, nb_fd = 0, i; + unsigned int fd_added = 0; + struct lttng_poll_event events; + struct ust_reg_wait_node *wait_node = NULL, *tmp_wait_node; + + assert(wait_queue); + + lttng_poll_init(&events); + + /* Just skip everything for an empty queue. */ + if (!wait_queue->count) { + goto end; + } + + ret = lttng_poll_create(&events, wait_queue->count, LTTNG_CLOEXEC); + if (ret < 0) { + goto error_create; + } + + cds_list_for_each_entry_safe(wait_node, tmp_wait_node, + &wait_queue->head, head) { + assert(wait_node->app); + ret = lttng_poll_add(&events, wait_node->app->sock, + LPOLLHUP | LPOLLERR); + if (ret < 0) { + goto error; + } + + fd_added = 1; + } + + if (!fd_added) { + goto end; + } + + /* + * Poll but don't block so we can quickly identify the faulty events and + * clean them afterwards from the wait queue. + */ + ret = lttng_poll_wait(&events, 0); + if (ret < 0) { + goto error; + } + nb_fd = ret; + + for (i = 0; i < nb_fd; i++) { + /* Get faulty FD. */ + uint32_t revents = LTTNG_POLL_GETEV(&events, i); + int pollfd = LTTNG_POLL_GETFD(&events, i); + + if (!revents) { + /* No activity for this FD (poll implementation). */ + continue; + } + + cds_list_for_each_entry_safe(wait_node, tmp_wait_node, + &wait_queue->head, head) { + if (pollfd == wait_node->app->sock && + (revents & (LPOLLHUP | LPOLLERR))) { + cds_list_del(&wait_node->head); + wait_queue->count--; + ust_app_destroy(wait_node->app); + free(wait_node); + break; + } + } + } + + if (nb_fd > 0) { + DBG("Wait queue sanitized, %d node were cleaned up", nb_fd); + } + +end: + lttng_poll_clean(&events); + return; + +error: + lttng_poll_clean(&events); +error_create: + ERR("Unable to sanitize wait queue"); + return; +} + /* * Dispatch request from the registration threads to the application * communication thread. */ static void *thread_dispatch_ust_registration(void *data) { - int ret; - struct cds_wfq_node *node; + int ret, err = -1; + struct cds_wfcq_node *node; struct ust_command *ust_cmd = NULL; + struct ust_reg_wait_node *wait_node = NULL, *tmp_wait_node; + struct ust_reg_wait_queue wait_queue = { + .count = 0, + }; + + health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG_DISPATCH); + + if (testpoint(sessiond_thread_app_reg_dispatch)) { + goto error_testpoint; + } + + health_code_update(); + + CDS_INIT_LIST_HEAD(&wait_queue.head); DBG("[thread] Dispatch UST command started"); while (!CMM_LOAD_SHARED(dispatch_thread_exit)) { + health_code_update(); + /* Atomically prepare the queue futex */ futex_nto1_prepare(&ust_cmd_queue.futex); do { + struct ust_app *app = NULL; + ust_cmd = NULL; + + /* + * Make sure we don't have node(s) that have hung up before receiving + * the notify socket. This is to clean the list in order to avoid + * memory leaks from notify socket that are never seen. + */ + sanitize_wait_queue(&wait_queue); + + health_code_update(); /* Dequeue command for registration */ - node = cds_wfq_dequeue_blocking(&ust_cmd_queue.queue); + node = cds_wfcq_dequeue_blocking(&ust_cmd_queue.head, &ust_cmd_queue.tail); if (node == NULL) { DBG("Woken up but nothing in the UST command queue"); /* Continue thread execution */ @@ -1217,34 +1867,193 @@ static void *thread_dispatch_ust_registration(void *data) ust_cmd->reg_msg.uid, ust_cmd->reg_msg.gid, ust_cmd->sock, ust_cmd->reg_msg.name, ust_cmd->reg_msg.major, ust_cmd->reg_msg.minor); - /* - * Inform apps thread of the new application registration. This - * call is blocking so we can be assured that the data will be read - * at some point in time or wait to the end of the world :) - */ - ret = write(apps_cmd_pipe[1], ust_cmd, - sizeof(struct ust_command)); - if (ret < 0) { - PERROR("write apps cmd pipe"); - if (errno == EBADF) { + + if (ust_cmd->reg_msg.type == USTCTL_SOCKET_CMD) { + wait_node = zmalloc(sizeof(*wait_node)); + if (!wait_node) { + PERROR("zmalloc wait_node dispatch"); + ret = close(ust_cmd->sock); + if (ret < 0) { + PERROR("close ust sock dispatch %d", ust_cmd->sock); + } + lttng_fd_put(LTTNG_FD_APPS, 1); + free(ust_cmd); + goto error; + } + CDS_INIT_LIST_HEAD(&wait_node->head); + + /* Create application object if socket is CMD. */ + wait_node->app = ust_app_create(&ust_cmd->reg_msg, + ust_cmd->sock); + if (!wait_node->app) { + ret = close(ust_cmd->sock); + if (ret < 0) { + PERROR("close ust sock dispatch %d", ust_cmd->sock); + } + lttng_fd_put(LTTNG_FD_APPS, 1); + free(wait_node); + free(ust_cmd); + continue; + } + /* + * Add application to the wait queue so we can set the notify + * socket before putting this object in the global ht. + */ + cds_list_add(&wait_node->head, &wait_queue.head); + wait_queue.count++; + + free(ust_cmd); + /* + * We have to continue here since we don't have the notify + * socket and the application MUST be added to the hash table + * only at that moment. + */ + continue; + } else { + /* + * Look for the application in the local wait queue and set the + * notify socket if found. + */ + cds_list_for_each_entry_safe(wait_node, tmp_wait_node, + &wait_queue.head, head) { + health_code_update(); + if (wait_node->app->pid == ust_cmd->reg_msg.pid) { + wait_node->app->notify_sock = ust_cmd->sock; + cds_list_del(&wait_node->head); + wait_queue.count--; + app = wait_node->app; + free(wait_node); + DBG3("UST app notify socket %d is set", ust_cmd->sock); + break; + } + } + + /* + * With no application at this stage the received socket is + * basically useless so close it before we free the cmd data + * structure for good. + */ + if (!app) { + ret = close(ust_cmd->sock); + if (ret < 0) { + PERROR("close ust sock dispatch %d", ust_cmd->sock); + } + lttng_fd_put(LTTNG_FD_APPS, 1); + } + free(ust_cmd); + } + + if (app) { + /* + * @session_lock_list + * + * Lock the global session list so from the register up to the + * registration done message, no thread can see the application + * and change its state. + */ + session_lock_list(); + rcu_read_lock(); + + /* + * Add application to the global hash table. This needs to be + * done before the update to the UST registry can locate the + * application. + */ + ust_app_add(app); + + /* Set app version. This call will print an error if needed. */ + (void) ust_app_version(app); + + /* Send notify socket through the notify pipe. */ + ret = send_socket_to_thread(apps_cmd_notify_pipe[1], + app->notify_sock); + if (ret < 0) { + rcu_read_unlock(); + session_unlock_list(); /* - * We can't inform the application thread to process - * registration. We will exit or else application - * registration will not occur and tracing will never - * start. + * No notify thread, stop the UST tracing. However, this is + * not an internal error of the this thread thus setting + * the health error code to a normal exit. */ + err = 0; goto error; } + + /* + * Update newly registered application with the tracing + * registry info already enabled information. + */ + update_ust_app(app->sock); + + /* + * Don't care about return value. Let the manage apps threads + * handle app unregistration upon socket close. + */ + (void) ust_app_register_done(app->sock); + + /* + * Even if the application socket has been closed, send the app + * to the thread and unregistration will take place at that + * place. + */ + ret = send_socket_to_thread(apps_cmd_pipe[1], app->sock); + if (ret < 0) { + rcu_read_unlock(); + session_unlock_list(); + /* + * No apps. thread, stop the UST tracing. However, this is + * not an internal error of the this thread thus setting + * the health error code to a normal exit. + */ + err = 0; + goto error; + } + + rcu_read_unlock(); + session_unlock_list(); } - free(ust_cmd); } while (node != NULL); + health_poll_entry(); /* Futex wait on queue. Blocking call on futex() */ futex_nto1_wait(&ust_cmd_queue.futex); + health_poll_exit(); } + /* Normal exit, no error */ + err = 0; error: + /* Clean up wait queue. */ + cds_list_for_each_entry_safe(wait_node, tmp_wait_node, + &wait_queue.head, head) { + cds_list_del(&wait_node->head); + wait_queue.count--; + free(wait_node); + } + + /* Empty command queue. */ + for (;;) { + /* Dequeue command for registration */ + node = cds_wfcq_dequeue_blocking(&ust_cmd_queue.head, &ust_cmd_queue.tail); + if (node == NULL) { + break; + } + ust_cmd = caa_container_of(node, struct ust_command, node); + ret = close(ust_cmd->sock); + if (ret < 0) { + PERROR("close ust sock exit dispatch %d", ust_cmd->sock); + } + lttng_fd_put(LTTNG_FD_APPS, 1); + free(ust_cmd); + } + +error_testpoint: DBG("Dispatch thread dying"); + if (err) { + health_error(); + ERR("Health error occurred in %s", __func__); + } + health_unregister(health_sessiond); return NULL; } @@ -1264,6 +2073,12 @@ static void *thread_registration_apps(void *data) DBG("[thread] Manage application registration started"); + health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG); + + if (testpoint(sessiond_thread_registration_apps)) { + goto error_testpoint; + } + ret = lttcomm_listen_unix_sock(apps_sock); if (ret < 0) { goto error_listen; @@ -1273,7 +2088,7 @@ static void *thread_registration_apps(void *data) * Pass 2 as size here for the thread quit pipe and apps socket. Nothing * more will be added to this poll set. */ - ret = create_thread_poll_set(&events, 2); + ret = sessiond_set_thread_pollset(&events, 2); if (ret < 0) { goto error_create_poll; } @@ -1295,13 +2110,11 @@ static void *thread_registration_apps(void *data) while (1) { DBG("Accepting application registration"); - nb_fd = LTTNG_POLL_GETNB(&events); - /* Inifinite blocking call, waiting for transmission */ restart: - health_poll_update(&health_thread_app_reg); + health_poll_entry(); ret = lttng_poll_wait(&events, -1); - health_poll_update(&health_thread_app_reg); + health_poll_exit(); if (ret < 0) { /* * Restart interrupted system call. @@ -1312,15 +2125,22 @@ static void *thread_registration_apps(void *data) goto error; } + nb_fd = ret; + for (i = 0; i < nb_fd; i++) { - health_code_update(&health_thread_app_reg); + health_code_update(); /* Fetch once the poll data */ revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); + if (!revents) { + /* No activity for this FD (poll implementation). */ + continue; + } + /* Thread quit pipe has been closed. Killing thread. */ - ret = check_thread_quit_pipe(pollfd, revents); + ret = sessiond_check_thread_quit_pipe(pollfd, revents); if (ret) { err = 0; goto exit; @@ -1337,10 +2157,32 @@ static void *thread_registration_apps(void *data) goto error; } + /* + * Set socket timeout for both receiving and ending. + * app_socket_timeout is in seconds, whereas + * lttcomm_setsockopt_rcv_timeout and + * lttcomm_setsockopt_snd_timeout expect msec as + * parameter. + */ + (void) lttcomm_setsockopt_rcv_timeout(sock, + app_socket_timeout * 1000); + (void) lttcomm_setsockopt_snd_timeout(sock, + app_socket_timeout * 1000); + + /* + * Set the CLOEXEC flag. Return code is useless because + * either way, the show must go on. + */ + (void) utils_set_fd_cloexec(sock); + /* Create UST registration command for enqueuing */ ust_cmd = zmalloc(sizeof(struct ust_command)); if (ust_cmd == NULL) { PERROR("ust command zmalloc"); + ret = close(sock); + if (ret) { + PERROR("close"); + } goto error; } @@ -1359,16 +2201,12 @@ static void *thread_registration_apps(void *data) sock = -1; continue; } - health_code_update(&health_thread_app_reg); - ret = lttcomm_recv_unix_sock(sock, &ust_cmd->reg_msg, - sizeof(struct ust_register_msg)); - if (ret < 0 || ret < sizeof(struct ust_register_msg)) { - if (ret < 0) { - PERROR("lttcomm_recv_unix_sock register apps"); - } else { - ERR("Wrong size received on apps register"); - } + + health_code_update(); + ret = ust_app_recv_registration(sock, &ust_cmd->reg_msg); + if (ret < 0) { free(ust_cmd); + /* Close socket of the application. */ ret = close(sock); if (ret) { PERROR("close"); @@ -1377,7 +2215,7 @@ static void *thread_registration_apps(void *data) sock = -1; continue; } - health_code_update(&health_thread_app_reg); + health_code_update(); ust_cmd->sock = sock; sock = -1; @@ -1393,11 +2231,11 @@ static void *thread_registration_apps(void *data) * Lock free enqueue the registration request. The red pill * has been taken! This apps will be part of the *system*. */ - cds_wfq_enqueue(&ust_cmd_queue.queue, &ust_cmd->node); + cds_wfcq_enqueue(&ust_cmd_queue.head, &ust_cmd_queue.tail, &ust_cmd->node); /* * Wake the registration queue futex. Implicit memory - * barrier with the exchange in cds_wfq_enqueue. + * barrier with the exchange in cds_wfcq_enqueue. */ futex_nto1_wake(&ust_cmd_queue.futex); } @@ -1407,12 +2245,6 @@ static void *thread_registration_apps(void *data) exit: error: - if (err) { - health_error(&health_thread_app_reg); - ERR("Health error occurred in %s", __func__); - } - health_exit(&health_thread_app_reg); - /* Notify that the registration thread is gone */ notify_ust_apps(0); @@ -1435,7 +2267,13 @@ error_poll_add: lttng_poll_clean(&events); error_listen: error_create_poll: +error_testpoint: DBG("UST Registration thread cleanup complete"); + if (err) { + health_error(); + ERR("Health error occurred in %s", __func__); + } + health_unregister(health_sessiond); return NULL; } @@ -1446,59 +2284,115 @@ error_create_poll: */ static int spawn_consumer_thread(struct consumer_data *consumer_data) { - int ret; + int ret, clock_ret; struct timespec timeout; - timeout.tv_sec = DEFAULT_SEM_WAIT_TIMEOUT; - timeout.tv_nsec = 0; + /* Make sure we set the readiness flag to 0 because we are NOT ready */ + consumer_data->consumer_thread_is_ready = 0; - /* Setup semaphore */ - ret = sem_init(&consumer_data->sem, 0, 0); - if (ret < 0) { - PERROR("sem_init consumer semaphore"); + /* Setup pthread condition */ + ret = pthread_condattr_init(&consumer_data->condattr); + if (ret) { + errno = ret; + PERROR("pthread_condattr_init consumer data"); goto error; } - ret = pthread_create(&consumer_data->thread, NULL, - thread_manage_consumer, consumer_data); - if (ret != 0) { + /* + * Set the monotonic clock in order to make sure we DO NOT jump in time + * between the clock_gettime() call and the timedwait call. See bug #324 + * for a more details and how we noticed it. + */ + ret = pthread_condattr_setclock(&consumer_data->condattr, CLOCK_MONOTONIC); + if (ret) { + errno = ret; + PERROR("pthread_condattr_setclock consumer data"); + goto error; + } + + ret = pthread_cond_init(&consumer_data->cond, &consumer_data->condattr); + if (ret) { + errno = ret; + PERROR("pthread_cond_init consumer data"); + goto error; + } + + ret = pthread_create(&consumer_data->thread, NULL, thread_manage_consumer, + consumer_data); + if (ret) { + errno = ret; PERROR("pthread_create consumer"); ret = -1; goto error; } + /* We are about to wait on a pthread condition */ + pthread_mutex_lock(&consumer_data->cond_mutex); + /* Get time for sem_timedwait absolute timeout */ - ret = clock_gettime(CLOCK_REALTIME, &timeout); - if (ret < 0) { - PERROR("clock_gettime spawn consumer"); - /* Infinite wait for the kconsumerd thread to be ready */ - ret = sem_wait(&consumer_data->sem); - } else { - /* Normal timeout if the gettime was successful */ - timeout.tv_sec += DEFAULT_SEM_WAIT_TIMEOUT; - ret = sem_timedwait(&consumer_data->sem, &timeout); + clock_ret = clock_gettime(CLOCK_MONOTONIC, &timeout); + /* + * Set the timeout for the condition timed wait even if the clock gettime + * call fails since we might loop on that call and we want to avoid to + * increment the timeout too many times. + */ + timeout.tv_sec += DEFAULT_SEM_WAIT_TIMEOUT; + + /* + * The following loop COULD be skipped in some conditions so this is why we + * set ret to 0 in order to make sure at least one round of the loop is + * done. + */ + ret = 0; + + /* + * Loop until the condition is reached or when a timeout is reached. Note + * that the pthread_cond_timedwait(P) man page specifies that EINTR can NOT + * be returned but the pthread_cond(3), from the glibc-doc, says that it is + * possible. This loop does not take any chances and works with both of + * them. + */ + while (!consumer_data->consumer_thread_is_ready && ret != ETIMEDOUT) { + if (clock_ret < 0) { + PERROR("clock_gettime spawn consumer"); + /* Infinite wait for the consumerd thread to be ready */ + ret = pthread_cond_wait(&consumer_data->cond, + &consumer_data->cond_mutex); + } else { + ret = pthread_cond_timedwait(&consumer_data->cond, + &consumer_data->cond_mutex, &timeout); + } } - if (ret < 0) { - if (errno == ETIMEDOUT) { + /* Release the pthread condition */ + pthread_mutex_unlock(&consumer_data->cond_mutex); + + if (ret != 0) { + errno = ret; + if (ret == ETIMEDOUT) { + int pth_ret; + /* * Call has timed out so we kill the kconsumerd_thread and return * an error. */ - ERR("The consumer thread was never ready. Killing it"); - ret = pthread_cancel(consumer_data->thread); - if (ret < 0) { + ERR("Condition timed out. The consumer thread was never ready." + " Killing it"); + pth_ret = pthread_cancel(consumer_data->thread); + if (pth_ret < 0) { PERROR("pthread_cancel consumer thread"); } } else { - PERROR("semaphore wait failed consumer thread"); + PERROR("pthread_cond_wait failed consumer thread"); } + /* Caller is expecting a negative value on failure. */ + ret = -1; goto error; } pthread_mutex_lock(&consumer_data->pid_mutex); if (consumer_data->pid == 0) { - ERR("Kconsumerd did not start"); + ERR("Consumerd did not start"); pthread_mutex_unlock(&consumer_data->pid_mutex); goto error; } @@ -1516,13 +2410,13 @@ error: static int join_consumer_thread(struct consumer_data *consumer_data) { void *status; - int ret; /* Consumer pid must be a real one. */ if (consumer_data->pid > 0) { + int ret; ret = kill(consumer_data->pid, SIGTERM); if (ret) { - ERR("Error killing consumer daemon"); + PERROR("Error killing consumer daemon"); return ret; } return pthread_join(consumer_data->thread, &status); @@ -1553,9 +2447,12 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) */ if (opt_verbose_consumer) { verbosity = "--verbose"; - } else { + } else if (lttng_opt_quiet) { verbosity = "--quiet"; + } else { + verbosity = ""; } + switch (consumer_data->type) { case LTTNG_CONSUMER_KERNEL: /* @@ -1578,13 +2475,15 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) consumer_to_use = consumerd32_bin; } else { DBG("Could not find any valid consumerd executable"); + ret = -EINVAL; break; } DBG("Using kernel consumer at: %s", consumer_to_use); - execl(consumer_to_use, + ret = execl(consumer_to_use, "lttng-consumerd", verbosity, "-k", "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path, "--consumerd-err-sock", consumer_data->err_unix_sock_path, + "--group", tracing_group_name, NULL); break; case LTTNG_CONSUMER64_UST: @@ -1595,7 +2494,7 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) char *tmp; size_t tmplen; - tmp = getenv("LD_LIBRARY_PATH"); + tmp = lttng_secure_getenv("LD_LIBRARY_PATH"); if (!tmp) { tmp = ""; } @@ -1615,6 +2514,7 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) ret = putenv(tmpnew); if (ret) { ret = -errno; + free(tmpnew); goto error; } } @@ -1622,13 +2522,11 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) ret = execl(consumerd64_bin, "lttng-consumerd", verbosity, "-u", "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path, "--consumerd-err-sock", consumer_data->err_unix_sock_path, + "--group", tracing_group_name, NULL); if (consumerd64_libdir[0] != '\0') { free(tmpnew); } - if (ret) { - goto error; - } break; } case LTTNG_CONSUMER32_UST: @@ -1639,7 +2537,7 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) char *tmp; size_t tmplen; - tmp = getenv("LD_LIBRARY_PATH"); + tmp = lttng_secure_getenv("LD_LIBRARY_PATH"); if (!tmp) { tmp = ""; } @@ -1659,6 +2557,7 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) ret = putenv(tmpnew); if (ret) { ret = -errno; + free(tmpnew); goto error; } } @@ -1666,13 +2565,11 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) ret = execl(consumerd32_bin, "lttng-consumerd", verbosity, "-u", "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path, "--consumerd-err-sock", consumer_data->err_unix_sock_path, + "--group", tracing_group_name, NULL); if (consumerd32_libdir[0] != '\0') { free(tmpnew); } - if (ret) { - goto error; - } break; } default: @@ -1680,8 +2577,9 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) exit(EXIT_FAILURE); } if (errno != 0) { - PERROR("kernel start consumer exec"); + PERROR("Consumer execl()"); } + /* Reaching this point, we got a failure on our execl(). */ exit(EXIT_FAILURE); } else if (pid > 0) { ret = pid; @@ -1700,6 +2598,16 @@ static int start_consumerd(struct consumer_data *consumer_data) { int ret; + /* + * Set the listen() state on the socket since there is a possible race + * between the exec() of the consumer daemon and this call if place in the + * consumer thread. See bug #366 for more details. + */ + ret = lttcomm_listen_unix_sock(consumer_data->err_sock); + if (ret < 0) { + goto error; + } + pthread_mutex_lock(&consumer_data->pid_mutex); if (consumer_data->pid != 0) { pthread_mutex_unlock(&consumer_data->pid_mutex); @@ -1729,23 +2637,15 @@ end: return 0; error: - return ret; -} - -/* - * Compute health status of each consumer. If one of them is zero (bad - * state), we return 0. - */ -static int check_consumer_health(void) -{ - int ret; - - ret = health_check_state(&kconsumer_data.health) && - health_check_state(&ustconsumer32_data.health) && - health_check_state(&ustconsumer64_data.health); - - DBG3("Health consumer check %d", ret); + /* Cleanup already created sockets on error. */ + if (consumer_data->err_sock >= 0) { + int err; + err = close(consumer_data->err_sock); + if (err < 0) { + PERROR("close consumer data error socket"); + } + } return ret; } @@ -1791,7 +2691,7 @@ error_version: PERROR("close"); } kernel_tracer_fd = -1; - return LTTCOMM_KERN_VERSION; + return LTTNG_ERR_KERN_VERSION; error_modules: ret = close(kernel_tracer_fd); @@ -1806,9 +2706,9 @@ error: WARN("No kernel tracer available"); kernel_tracer_fd = -1; if (!is_root) { - return LTTCOMM_NEED_ROOT_SESSIOND; + return LTTNG_ERR_NEED_ROOT_SESSIOND; } else { - return LTTCOMM_KERN_NA; + return LTTNG_ERR_KERN_NA; } } @@ -1817,6 +2717,8 @@ error: * Copy consumer output from the tracing session to the domain session. The * function also applies the right modification on a per domain basis for the * trace files destination directory. + * + * Should *NOT* be called with RCU read-side lock held. */ static int copy_session_consumer(int domain, struct ltt_session *session) { @@ -1830,14 +2732,29 @@ static int copy_session_consumer(int domain, struct ltt_session *session) switch (domain) { case LTTNG_DOMAIN_KERNEL: DBG3("Copying tracing session consumer output in kernel session"); + /* + * XXX: We should audit the session creation and what this function + * does "extra" in order to avoid a destroy since this function is used + * in the domain session creation (kernel and ust) only. Same for UST + * domain. + */ + if (session->kernel_session->consumer) { + consumer_output_put(session->kernel_session->consumer); + } session->kernel_session->consumer = consumer_copy_output(session->consumer); /* Ease our life a bit for the next part */ consumer = session->kernel_session->consumer; dir_name = DEFAULT_KERNEL_TRACE_DIR; break; + case LTTNG_DOMAIN_JUL: + case LTTNG_DOMAIN_LOG4J: + case LTTNG_DOMAIN_PYTHON: case LTTNG_DOMAIN_UST: DBG3("Copying tracing session consumer output in UST session"); + if (session->ust_session->consumer) { + consumer_output_put(session->ust_session->consumer); + } session->ust_session->consumer = consumer_copy_output(session->consumer); /* Ease our life a bit for the next part */ @@ -1845,7 +2762,7 @@ static int copy_session_consumer(int domain, struct ltt_session *session) dir_name = DEFAULT_UST_TRACE_DIR; break; default: - ret = LTTCOMM_UNKNOWN_DOMAIN; + ret = LTTNG_ERR_UNKNOWN_DOMAIN; goto error; } @@ -1854,7 +2771,7 @@ static int copy_session_consumer(int domain, struct ltt_session *session) sizeof(consumer->subdir) - strlen(consumer->subdir) - 1); DBG3("Copy session consumer subdir %s", consumer->subdir); - ret = LTTCOMM_OK; + ret = LTTNG_OK; error: return ret; @@ -1862,6 +2779,8 @@ error: /* * Create an UST session and add it to the session ust list. + * + * Should *NOT* be called with RCU read-side lock held. */ static int create_ust_session(struct ltt_session *session, struct lttng_domain *domain) @@ -1874,33 +2793,48 @@ static int create_ust_session(struct ltt_session *session, assert(session->consumer); switch (domain->type) { + case LTTNG_DOMAIN_JUL: + case LTTNG_DOMAIN_LOG4J: + case LTTNG_DOMAIN_PYTHON: case LTTNG_DOMAIN_UST: break; default: ERR("Unknown UST domain on create session %d", domain->type); - ret = LTTCOMM_UNKNOWN_DOMAIN; + ret = LTTNG_ERR_UNKNOWN_DOMAIN; goto error; } DBG("Creating UST session"); - lus = trace_ust_create_session(session->path, session->id, domain); + lus = trace_ust_create_session(session->id); if (lus == NULL) { - ret = LTTCOMM_UST_SESS_FAIL; + ret = LTTNG_ERR_UST_SESS_FAIL; goto error; } lus->uid = session->uid; lus->gid = session->gid; + lus->output_traces = session->output_traces; + lus->snapshot_mode = session->snapshot_mode; + lus->live_timer_interval = session->live_timer; session->ust_session = lus; - + if (session->shm_path[0]) { + strncpy(lus->root_shm_path, session->shm_path, + sizeof(lus->root_shm_path)); + lus->root_shm_path[sizeof(lus->root_shm_path) - 1] = '\0'; + strncpy(lus->shm_path, session->shm_path, + sizeof(lus->shm_path)); + lus->shm_path[sizeof(lus->shm_path) - 1] = '\0'; + strncat(lus->shm_path, "/ust", + sizeof(lus->shm_path) - strlen(lus->shm_path) - 1); + } /* Copy session output to the newly created UST session */ ret = copy_session_consumer(domain->type, session); - if (ret != LTTCOMM_OK) { + if (ret != LTTNG_OK) { goto error; } - return LTTCOMM_OK; + return LTTNG_OK; error: free(lus); @@ -1919,7 +2853,7 @@ static int create_kernel_session(struct ltt_session *session) ret = kernel_create_session(session, kernel_tracer_fd); if (ret < 0) { - ret = LTTCOMM_KERN_SESS_FAIL; + ret = LTTNG_ERR_KERN_SESS_FAIL; goto error; } @@ -1928,7 +2862,7 @@ static int create_kernel_session(struct ltt_session *session) /* Copy session output to the newly created Kernel session */ ret = copy_session_consumer(LTTNG_DOMAIN_KERNEL, session); - if (ret != LTTCOMM_OK) { + if (ret != LTTNG_OK) { goto error; } @@ -1939,7 +2873,7 @@ static int create_kernel_session(struct ltt_session *session) session->kernel_session->consumer->dst.trace_path, S_IRWXU | S_IRWXG, session->uid, session->gid); if (ret < 0) { - if (ret != -EEXIST) { + if (errno != EEXIST) { ERR("Trace directory creation error"); goto error; } @@ -1948,8 +2882,10 @@ static int create_kernel_session(struct ltt_session *session) session->kernel_session->uid = session->uid; session->kernel_session->gid = session->gid; + session->kernel_session->output_traces = session->output_traces; + session->kernel_session->snapshot_mode = session->snapshot_mode; - return LTTCOMM_OK; + return LTTNG_OK; error: trace_kernel_destroy_session(session->kernel_session); @@ -1987,11 +2923,13 @@ static unsigned int lttng_sessions_count(uid_t uid, gid_t gid) * Return any error encountered or 0 for success. * * "sock" is only used for special-case var. len data. + * + * Should *NOT* be called with RCU read-side lock held. */ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, int *sock_error) { - int ret = LTTCOMM_OK; + int ret = LTTNG_OK; int need_tracing_session = 1; int need_domain; @@ -2001,11 +2939,20 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, switch (cmd_ctx->lsm->cmd_type) { case LTTNG_CREATE_SESSION: + case LTTNG_CREATE_SESSION_SNAPSHOT: + case LTTNG_CREATE_SESSION_LIVE: case LTTNG_DESTROY_SESSION: case LTTNG_LIST_SESSIONS: case LTTNG_LIST_DOMAINS: case LTTNG_START_TRACE: case LTTNG_STOP_TRACE: + case LTTNG_DATA_PENDING: + case LTTNG_SNAPSHOT_ADD_OUTPUT: + case LTTNG_SNAPSHOT_DEL_OUTPUT: + case LTTNG_SNAPSHOT_LIST_OUTPUT: + case LTTNG_SNAPSHOT_RECORD: + case LTTNG_SAVE_SESSION: + case LTTNG_SET_SESSION_SHM_PATH: need_domain = 0; break; default: @@ -2015,9 +2962,9 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, if (opt_no_kernel && need_domain && cmd_ctx->lsm->domain.type == LTTNG_DOMAIN_KERNEL) { if (!is_root) { - ret = LTTCOMM_NEED_ROOT_SESSIOND; + ret = LTTNG_ERR_NEED_ROOT_SESSIOND; } else { - ret = LTTCOMM_KERN_NA; + ret = LTTNG_ERR_KERN_NA; } goto error; } @@ -2026,7 +2973,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, if (cmd_ctx->lsm->cmd_type == LTTNG_REGISTER_CONSUMER) { pthread_mutex_lock(&kconsumer_data.pid_mutex); if (kconsumer_data.pid > 0) { - ret = LTTCOMM_KERN_CONSUMER_FAIL; + ret = LTTNG_ERR_KERN_CONSUMER_FAIL; pthread_mutex_unlock(&kconsumer_data.pid_mutex); goto error; } @@ -2045,6 +2992,8 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, case LTTNG_LIST_DOMAINS: case LTTNG_LIST_CHANNELS: case LTTNG_LIST_EVENTS: + case LTTNG_LIST_SYSCALLS: + case LTTNG_LIST_TRACKER_PIDS: break; default: /* Setup lttng message with no payload */ @@ -2058,10 +3007,14 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, /* Commands that DO NOT need a session. */ switch (cmd_ctx->lsm->cmd_type) { case LTTNG_CREATE_SESSION: + case LTTNG_CREATE_SESSION_SNAPSHOT: + case LTTNG_CREATE_SESSION_LIVE: case LTTNG_CALIBRATE: case LTTNG_LIST_SESSIONS: case LTTNG_LIST_TRACEPOINTS: + case LTTNG_LIST_SYSCALLS: case LTTNG_LIST_TRACEPOINT_FIELDS: + case LTTNG_SAVE_SESSION: need_tracing_session = 0; break; default: @@ -2074,12 +3027,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, session_lock_list(); cmd_ctx->session = session_find_by_name(cmd_ctx->lsm->session.name); if (cmd_ctx->session == NULL) { - if (cmd_ctx->lsm->session.name != NULL) { - ret = LTTCOMM_SESS_NOT_FOUND; - } else { - /* If no session name specified */ - ret = LTTCOMM_SELECT_SESS; - } + ret = LTTNG_ERR_SESS_NOT_FOUND; goto error; } else { /* Acquire lock for the session */ @@ -2088,6 +3036,39 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, break; } + /* + * Commands that need a valid session but should NOT create one if none + * exists. Instead of creating one and destroying it when the command is + * handled, process that right before so we save some round trip in useless + * code path. + */ + switch (cmd_ctx->lsm->cmd_type) { + case LTTNG_DISABLE_CHANNEL: + case LTTNG_DISABLE_EVENT: + switch (cmd_ctx->lsm->domain.type) { + case LTTNG_DOMAIN_KERNEL: + if (!cmd_ctx->session->kernel_session) { + ret = LTTNG_ERR_NO_CHANNEL; + goto error; + } + break; + case LTTNG_DOMAIN_JUL: + case LTTNG_DOMAIN_LOG4J: + case LTTNG_DOMAIN_PYTHON: + case LTTNG_DOMAIN_UST: + if (!cmd_ctx->session->ust_session) { + ret = LTTNG_ERR_NO_CHANNEL; + goto error; + } + break; + default: + ret = LTTNG_ERR_UNKNOWN_DOMAIN; + goto error; + } + default: + break; + } + if (!need_domain) { goto skip_domain; } @@ -2098,7 +3079,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, switch (cmd_ctx->lsm->domain.type) { case LTTNG_DOMAIN_KERNEL: if (!is_root) { - ret = LTTCOMM_NEED_ROOT_SESSIOND; + ret = LTTNG_ERR_NEED_ROOT_SESSIOND; goto error; } @@ -2113,7 +3094,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, /* Consumer is in an ERROR state. Report back to client */ if (uatomic_read(&kernel_consumerd_state) == CONSUMER_ERROR) { - ret = LTTCOMM_NO_KERNCONSUMERD; + ret = LTTNG_ERR_NO_KERNCONSUMERD; goto error; } @@ -2122,7 +3103,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, if (cmd_ctx->session->kernel_session == NULL) { ret = create_kernel_session(cmd_ctx->session); if (ret < 0) { - ret = LTTCOMM_KERN_SESS_FAIL; + ret = LTTNG_ERR_KERN_SESS_FAIL; goto error; } } @@ -2130,12 +3111,11 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, /* Start the kernel consumer daemon */ pthread_mutex_lock(&kconsumer_data.pid_mutex); if (kconsumer_data.pid == 0 && - cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER && - cmd_ctx->session->start_consumer) { + cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) { pthread_mutex_unlock(&kconsumer_data.pid_mutex); ret = start_consumerd(&kconsumer_data); if (ret < 0) { - ret = LTTCOMM_KERN_CONSUMER_FAIL; + ret = LTTNG_ERR_KERN_CONSUMER_FAIL; goto error; } uatomic_set(&kernel_consumerd_state, CONSUMER_STARTED); @@ -2155,11 +3135,18 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, } break; + case LTTNG_DOMAIN_JUL: + case LTTNG_DOMAIN_LOG4J: + case LTTNG_DOMAIN_PYTHON: case LTTNG_DOMAIN_UST: { + if (!ust_app_supported()) { + ret = LTTNG_ERR_NO_UST; + goto error; + } /* Consumer is in an ERROR state. Report back to client */ if (uatomic_read(&ust_consumerd_state) == CONSUMER_ERROR) { - ret = LTTCOMM_NO_USTCONSUMERD; + ret = LTTNG_ERR_NO_USTCONSUMERD; goto error; } @@ -2168,7 +3155,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, if (cmd_ctx->session->ust_session == NULL) { ret = create_ust_session(cmd_ctx->session, &cmd_ctx->lsm->domain); - if (ret != LTTCOMM_OK) { + if (ret != LTTNG_OK) { goto error; } } @@ -2178,12 +3165,11 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, pthread_mutex_lock(&ustconsumer64_data.pid_mutex); if (consumerd64_bin[0] != '\0' && ustconsumer64_data.pid == 0 && - cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER && - cmd_ctx->session->start_consumer) { + cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) { pthread_mutex_unlock(&ustconsumer64_data.pid_mutex); ret = start_consumerd(&ustconsumer64_data); if (ret < 0) { - ret = LTTCOMM_UST_CONSUMER64_FAIL; + ret = LTTNG_ERR_UST_CONSUMER64_FAIL; uatomic_set(&ust_consumerd64_fd, -EINVAL); goto error; } @@ -2205,14 +3191,14 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, } /* 32-bit */ + pthread_mutex_lock(&ustconsumer32_data.pid_mutex); if (consumerd32_bin[0] != '\0' && ustconsumer32_data.pid == 0 && - cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER && - cmd_ctx->session->start_consumer) { + cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) { pthread_mutex_unlock(&ustconsumer32_data.pid_mutex); ret = start_consumerd(&ustconsumer32_data); if (ret < 0) { - ret = LTTCOMM_UST_CONSUMER32_FAIL; + ret = LTTNG_ERR_UST_CONSUMER32_FAIL; uatomic_set(&ust_consumerd32_fd, -EINVAL); goto error; } @@ -2244,15 +3230,18 @@ skip_domain: if (cmd_ctx->lsm->cmd_type == LTTNG_START_TRACE || cmd_ctx->lsm->cmd_type == LTTNG_STOP_TRACE) { switch (cmd_ctx->lsm->domain.type) { + case LTTNG_DOMAIN_JUL: + case LTTNG_DOMAIN_LOG4J: + case LTTNG_DOMAIN_PYTHON: case LTTNG_DOMAIN_UST: if (uatomic_read(&ust_consumerd_state) != CONSUMER_STARTED) { - ret = LTTCOMM_NO_USTCONSUMERD; + ret = LTTNG_ERR_NO_USTCONSUMERD; goto error; } break; case LTTNG_DOMAIN_KERNEL: if (uatomic_read(&kernel_consumerd_state) != CONSUMER_STARTED) { - ret = LTTCOMM_NO_KERNCONSUMERD; + ret = LTTNG_ERR_NO_KERNCONSUMERD; goto error; } break; @@ -2267,7 +3256,22 @@ skip_domain: if (!session_access_ok(cmd_ctx->session, LTTNG_SOCK_GET_UID_CRED(&cmd_ctx->creds), LTTNG_SOCK_GET_GID_CRED(&cmd_ctx->creds))) { - ret = LTTCOMM_EPERM; + ret = LTTNG_ERR_EPERM; + goto error; + } + } + + /* + * Send relayd information to consumer as soon as we have a domain and a + * session defined. + */ + if (cmd_ctx->session && need_domain) { + /* + * Setup relayd if not done yet. If the relayd information was already + * sent to the consumer, this call will gracefully return. + */ + ret = cmd_setup_relayd(cmd_ctx->session); + if (ret != LTTNG_OK) { goto error; } } @@ -2278,8 +3282,7 @@ skip_domain: { ret = cmd_add_context(cmd_ctx->session, cmd_ctx->lsm->domain.type, cmd_ctx->lsm->u.context.channel_name, - cmd_ctx->lsm->u.context.event_name, - &cmd_ctx->lsm->u.context.ctx); + &cmd_ctx->lsm->u.context.ctx, kernel_poll_pipe[1]); break; } case LTTNG_DISABLE_CHANNEL: @@ -2290,68 +3293,167 @@ skip_domain: } case LTTNG_DISABLE_EVENT: { + + /* + * FIXME: handle filter; for now we just receive the filter's + * bytecode along with the filter expression which are sent by + * liblttng-ctl and discard them. + * + * This fixes an issue where the client may block while sending + * the filter payload and encounter an error because the session + * daemon closes the socket without ever handling this data. + */ + size_t count = cmd_ctx->lsm->u.disable.expression_len + + cmd_ctx->lsm->u.disable.bytecode_len; + + if (count) { + char data[LTTNG_FILTER_MAX_LEN]; + + DBG("Discarding disable event command payload of size %zu", count); + while (count) { + ret = lttcomm_recv_unix_sock(sock, data, + count > sizeof(data) ? sizeof(data) : count); + if (ret < 0) { + goto error; + } + + count -= (size_t) ret; + } + } + /* FIXME: passing packed structure to non-packed pointer */ ret = cmd_disable_event(cmd_ctx->session, cmd_ctx->lsm->domain.type, cmd_ctx->lsm->u.disable.channel_name, - cmd_ctx->lsm->u.disable.name); + &cmd_ctx->lsm->u.disable.event); break; } - case LTTNG_DISABLE_ALL_EVENT: + case LTTNG_ENABLE_CHANNEL: { - DBG("Disabling all events"); - - ret = cmd_disable_event_all(cmd_ctx->session, cmd_ctx->lsm->domain.type, - cmd_ctx->lsm->u.disable.channel_name); + ret = cmd_enable_channel(cmd_ctx->session, &cmd_ctx->lsm->domain, + &cmd_ctx->lsm->u.channel.chan, kernel_poll_pipe[1]); break; } - case LTTNG_DISABLE_CONSUMER: + case LTTNG_TRACK_PID: { - ret = cmd_disable_consumer(cmd_ctx->lsm->domain.type, cmd_ctx->session); + ret = cmd_track_pid(cmd_ctx->session, + cmd_ctx->lsm->domain.type, + cmd_ctx->lsm->u.pid_tracker.pid); break; } - case LTTNG_ENABLE_CHANNEL: + case LTTNG_UNTRACK_PID: { - ret = cmd_enable_channel(cmd_ctx->session, cmd_ctx->lsm->domain.type, - &cmd_ctx->lsm->u.channel.chan, kernel_poll_pipe[1]); + ret = cmd_untrack_pid(cmd_ctx->session, + cmd_ctx->lsm->domain.type, + cmd_ctx->lsm->u.pid_tracker.pid); break; } - case LTTNG_ENABLE_CONSUMER: + case LTTNG_ENABLE_EVENT: { - /* - * XXX: 0 means that this URI should be applied on the session. Should - * be a DOMAIN enuam. - */ - ret = cmd_enable_consumer(cmd_ctx->lsm->domain.type, cmd_ctx->session); - if (ret != LTTCOMM_OK) { - goto error; + struct lttng_event_exclusion *exclusion = NULL; + struct lttng_filter_bytecode *bytecode = NULL; + char *filter_expression = NULL; + + /* Handle exclusion events and receive it from the client. */ + if (cmd_ctx->lsm->u.enable.exclusion_count > 0) { + size_t count = cmd_ctx->lsm->u.enable.exclusion_count; + + exclusion = zmalloc(sizeof(struct lttng_event_exclusion) + + (count * LTTNG_SYMBOL_NAME_LEN)); + if (!exclusion) { + ret = LTTNG_ERR_EXCLUSION_NOMEM; + goto error; + } + + DBG("Receiving var len exclusion event list from client ..."); + exclusion->count = count; + ret = lttcomm_recv_unix_sock(sock, exclusion->names, + count * LTTNG_SYMBOL_NAME_LEN); + if (ret <= 0) { + DBG("Nothing recv() from client var len data... continuing"); + *sock_error = 1; + free(exclusion); + ret = LTTNG_ERR_EXCLUSION_INVAL; + goto error; + } + } + + /* Get filter expression from client. */ + if (cmd_ctx->lsm->u.enable.expression_len > 0) { + size_t expression_len = + cmd_ctx->lsm->u.enable.expression_len; + + if (expression_len > LTTNG_FILTER_MAX_LEN) { + ret = LTTNG_ERR_FILTER_INVAL; + free(exclusion); + goto error; + } + + filter_expression = zmalloc(expression_len); + if (!filter_expression) { + free(exclusion); + ret = LTTNG_ERR_FILTER_NOMEM; + goto error; + } + + /* Receive var. len. data */ + DBG("Receiving var len filter's expression from client ..."); + ret = lttcomm_recv_unix_sock(sock, filter_expression, + expression_len); + if (ret <= 0) { + DBG("Nothing recv() from client car len data... continuing"); + *sock_error = 1; + free(filter_expression); + free(exclusion); + ret = LTTNG_ERR_FILTER_INVAL; + goto error; + } } - if (cmd_ctx->lsm->domain.type == 0) { - /* Add the URI for the UST session if a consumer is present. */ - if (cmd_ctx->session->ust_session && - cmd_ctx->session->ust_session->consumer) { - ret = cmd_enable_consumer(LTTNG_DOMAIN_UST, cmd_ctx->session); - } else if (cmd_ctx->session->kernel_session && - cmd_ctx->session->kernel_session->consumer) { - ret = cmd_enable_consumer(LTTNG_DOMAIN_KERNEL, - cmd_ctx->session); + /* Handle filter and get bytecode from client. */ + if (cmd_ctx->lsm->u.enable.bytecode_len > 0) { + size_t bytecode_len = cmd_ctx->lsm->u.enable.bytecode_len; + + if (bytecode_len > LTTNG_FILTER_MAX_LEN) { + ret = LTTNG_ERR_FILTER_INVAL; + free(filter_expression); + free(exclusion); + goto error; + } + + bytecode = zmalloc(bytecode_len); + if (!bytecode) { + free(filter_expression); + free(exclusion); + ret = LTTNG_ERR_FILTER_NOMEM; + goto error; + } + + /* Receive var. len. data */ + DBG("Receiving var len filter's bytecode from client ..."); + ret = lttcomm_recv_unix_sock(sock, bytecode, bytecode_len); + if (ret <= 0) { + DBG("Nothing recv() from client car len data... continuing"); + *sock_error = 1; + free(filter_expression); + free(bytecode); + free(exclusion); + ret = LTTNG_ERR_FILTER_INVAL; + goto error; + } + + if ((bytecode->len + sizeof(*bytecode)) != bytecode_len) { + free(filter_expression); + free(bytecode); + free(exclusion); + ret = LTTNG_ERR_FILTER_INVAL; + goto error; } } - break; - } - case LTTNG_ENABLE_EVENT: - { - ret = cmd_enable_event(cmd_ctx->session, cmd_ctx->lsm->domain.type, - cmd_ctx->lsm->u.enable.channel_name, - &cmd_ctx->lsm->u.enable.event, kernel_poll_pipe[1]); - break; - } - case LTTNG_ENABLE_ALL_EVENT: - { - DBG("Enabling all events"); - ret = cmd_enable_event_all(cmd_ctx->session, cmd_ctx->lsm->domain.type, + ret = cmd_enable_event(cmd_ctx->session, &cmd_ctx->lsm->domain, cmd_ctx->lsm->u.enable.channel_name, - cmd_ctx->lsm->u.enable.event.type, kernel_poll_pipe[1]); + &cmd_ctx->lsm->u.enable.event, + filter_expression, bytecode, exclusion, + kernel_poll_pipe[1]); break; } case LTTNG_LIST_TRACEPOINTS: @@ -2359,8 +3461,11 @@ skip_domain: struct lttng_event *events; ssize_t nb_events; + session_lock_list(); nb_events = cmd_list_tracepoints(cmd_ctx->lsm->domain.type, &events); + session_unlock_list(); if (nb_events < 0) { + /* Return value is a negative lttng_error_code. */ ret = -nb_events; goto error; } @@ -2381,7 +3486,7 @@ skip_domain: free(events); - ret = LTTCOMM_OK; + ret = LTTNG_OK; break; } case LTTNG_LIST_TRACEPOINT_FIELDS: @@ -2389,9 +3494,12 @@ skip_domain: struct lttng_event_field *fields; ssize_t nb_fields; + session_lock_list(); nb_fields = cmd_list_tracepoint_fields(cmd_ctx->lsm->domain.type, &fields); + session_unlock_list(); if (nb_fields < 0) { + /* Return value is a negative lttng_error_code. */ ret = -nb_fields; goto error; } @@ -2413,7 +3521,70 @@ skip_domain: free(fields); - ret = LTTCOMM_OK; + ret = LTTNG_OK; + break; + } + case LTTNG_LIST_SYSCALLS: + { + struct lttng_event *events; + ssize_t nb_events; + + nb_events = cmd_list_syscalls(&events); + if (nb_events < 0) { + /* Return value is a negative lttng_error_code. */ + ret = -nb_events; + goto error; + } + + /* + * Setup lttng message with payload size set to the event list size in + * bytes and then copy list into the llm payload. + */ + ret = setup_lttng_msg(cmd_ctx, sizeof(struct lttng_event) * nb_events); + if (ret < 0) { + free(events); + goto setup_error; + } + + /* Copy event list into message payload */ + memcpy(cmd_ctx->llm->payload, events, + sizeof(struct lttng_event) * nb_events); + + free(events); + + ret = LTTNG_OK; + break; + } + case LTTNG_LIST_TRACKER_PIDS: + { + int32_t *pids = NULL; + ssize_t nr_pids; + + nr_pids = cmd_list_tracker_pids(cmd_ctx->session, + cmd_ctx->lsm->domain.type, &pids); + if (nr_pids < 0) { + /* Return value is a negative lttng_error_code. */ + ret = -nr_pids; + goto error; + } + + /* + * Setup lttng message with payload size set to the event list size in + * bytes and then copy list into the llm payload. + */ + ret = setup_lttng_msg(cmd_ctx, sizeof(int32_t) * nr_pids); + if (ret < 0) { + free(pids); + goto setup_error; + } + + /* Copy event list into message payload */ + memcpy(cmd_ctx->llm->payload, pids, + sizeof(int) * nr_pids); + + free(pids); + + ret = LTTNG_OK; break; } case LTTNG_SET_CONSUMER_URI: @@ -2425,13 +3596,13 @@ skip_domain: len = nb_uri * sizeof(struct lttng_uri); if (nb_uri == 0) { - ret = LTTCOMM_INVALID; + ret = LTTNG_ERR_INVALID; goto error; } uris = zmalloc(len); if (uris == NULL) { - ret = LTTCOMM_FATAL; + ret = LTTNG_ERR_FATAL; goto error; } @@ -2441,32 +3612,17 @@ skip_domain: if (ret <= 0) { DBG("No URIs received from client... continuing"); *sock_error = 1; - ret = LTTCOMM_SESSION_FAIL; + ret = LTTNG_ERR_SESSION_FAIL; + free(uris); goto error; } - ret = cmd_set_consumer_uri(cmd_ctx->lsm->domain.type, cmd_ctx->session, - nb_uri, uris); - if (ret != LTTCOMM_OK) { + ret = cmd_set_consumer_uri(cmd_ctx->session, nb_uri, uris); + free(uris); + if (ret != LTTNG_OK) { goto error; } - /* - * XXX: 0 means that this URI should be applied on the session. Should - * be a DOMAIN enuam. - */ - if (cmd_ctx->lsm->domain.type == 0) { - /* Add the URI for the UST session if a consumer is present. */ - if (cmd_ctx->session->ust_session && - cmd_ctx->session->ust_session->consumer) { - ret = cmd_set_consumer_uri(LTTNG_DOMAIN_UST, cmd_ctx->session, - nb_uri, uris); - } else if (cmd_ctx->session->kernel_session && - cmd_ctx->session->kernel_session->consumer) { - ret = cmd_set_consumer_uri(LTTNG_DOMAIN_KERNEL, - cmd_ctx->session, nb_uri, uris); - } - } break; } @@ -2491,7 +3647,7 @@ skip_domain: if (nb_uri > 0) { uris = zmalloc(len); if (uris == NULL) { - ret = LTTCOMM_FATAL; + ret = LTTNG_ERR_FATAL; goto error; } @@ -2501,19 +3657,23 @@ skip_domain: if (ret <= 0) { DBG("No URIs received from client... continuing"); *sock_error = 1; - ret = LTTCOMM_SESSION_FAIL; + ret = LTTNG_ERR_SESSION_FAIL; + free(uris); goto error; } if (nb_uri == 1 && uris[0].dtype != LTTNG_DST_PATH) { DBG("Creating session with ONE network URI is a bad call"); - ret = LTTCOMM_SESSION_FAIL; + ret = LTTNG_ERR_SESSION_FAIL; + free(uris); goto error; } } ret = cmd_create_session_uri(cmd_ctx->lsm->session.name, uris, nb_uri, - &cmd_ctx->creds); + &cmd_ctx->creds, 0); + + free(uris); break; } @@ -2528,16 +3688,18 @@ skip_domain: case LTTNG_LIST_DOMAINS: { ssize_t nb_dom; - struct lttng_domain *domains; + struct lttng_domain *domains = NULL; nb_dom = cmd_list_domains(cmd_ctx->session, &domains); if (nb_dom < 0) { + /* Return value is a negative lttng_error_code. */ ret = -nb_dom; goto error; } ret = setup_lttng_msg(cmd_ctx, nb_dom * sizeof(struct lttng_domain)); if (ret < 0) { + free(domains); goto setup_error; } @@ -2547,23 +3709,25 @@ skip_domain: free(domains); - ret = LTTCOMM_OK; + ret = LTTNG_OK; break; } case LTTNG_LIST_CHANNELS: { int nb_chan; - struct lttng_channel *channels; + struct lttng_channel *channels = NULL; nb_chan = cmd_list_channels(cmd_ctx->lsm->domain.type, cmd_ctx->session, &channels); if (nb_chan < 0) { + /* Return value is a negative lttng_error_code. */ ret = -nb_chan; goto error; } ret = setup_lttng_msg(cmd_ctx, nb_chan * sizeof(struct lttng_channel)); if (ret < 0) { + free(channels); goto setup_error; } @@ -2573,7 +3737,7 @@ skip_domain: free(channels); - ret = LTTCOMM_OK; + ret = LTTNG_OK; break; } case LTTNG_LIST_EVENTS: @@ -2584,12 +3748,14 @@ skip_domain: nb_event = cmd_list_events(cmd_ctx->lsm->domain.type, cmd_ctx->session, cmd_ctx->lsm->u.list.channel_name, &events); if (nb_event < 0) { + /* Return value is a negative lttng_error_code. */ ret = -nb_event; goto error; } ret = setup_lttng_msg(cmd_ctx, nb_event * sizeof(struct lttng_event)); if (ret < 0) { + free(events); goto setup_error; } @@ -2599,7 +3765,7 @@ skip_domain: free(events); - ret = LTTCOMM_OK; + ret = LTTNG_OK; break; } case LTTNG_LIST_SESSIONS: @@ -2624,7 +3790,7 @@ skip_domain: session_unlock_list(); - ret = LTTCOMM_OK; + ret = LTTNG_OK; break; } case LTTNG_CALIBRATE: @@ -2642,7 +3808,7 @@ skip_domain: cdata = &kconsumer_data; break; default: - ret = LTTCOMM_UND; + ret = LTTNG_ERR_UND; goto error; } @@ -2650,45 +3816,192 @@ skip_domain: cmd_ctx->lsm->u.reg.path, cdata); break; } - case LTTNG_SET_FILTER: + case LTTNG_DATA_PENDING: { - struct lttng_filter_bytecode *bytecode; + int pending_ret; - if (cmd_ctx->lsm->u.filter.bytecode_len > 65336) { - ret = LTTCOMM_FILTER_INVAL; - goto error; + /* 1 byte to return whether or not data is pending */ + ret = setup_lttng_msg(cmd_ctx, 1); + if (ret < 0) { + goto setup_error; } - bytecode = zmalloc(cmd_ctx->lsm->u.filter.bytecode_len); - if (!bytecode) { - ret = LTTCOMM_FILTER_NOMEM; - goto error; + + pending_ret = cmd_data_pending(cmd_ctx->session); + /* + * FIXME + * + * This function may returns 0 or 1 to indicate whether or not + * there is data pending. In case of error, it should return an + * LTTNG_ERR code. However, some code paths may still return + * a nondescript error code, which we handle by returning an + * "unknown" error. + */ + if (pending_ret == 0 || pending_ret == 1) { + ret = LTTNG_OK; + } else if (pending_ret < 0) { + ret = LTTNG_ERR_UNK; + goto setup_error; + } else { + ret = pending_ret; + goto setup_error; } - /* Receive var. len. data */ - DBG("Receiving var len data from client ..."); - ret = lttcomm_recv_unix_sock(sock, bytecode, - cmd_ctx->lsm->u.filter.bytecode_len); - if (ret <= 0) { - DBG("Nothing recv() from client var len data... continuing"); - *sock_error = 1; - ret = LTTCOMM_FILTER_INVAL; + + *cmd_ctx->llm->payload = (uint8_t) pending_ret; + break; + } + case LTTNG_SNAPSHOT_ADD_OUTPUT: + { + struct lttcomm_lttng_output_id reply; + + ret = cmd_snapshot_add_output(cmd_ctx->session, + &cmd_ctx->lsm->u.snapshot_output.output, &reply.id); + if (ret != LTTNG_OK) { goto error; } - if (bytecode->len + sizeof(*bytecode) - != cmd_ctx->lsm->u.filter.bytecode_len) { - free(bytecode); - ret = LTTCOMM_FILTER_INVAL; + ret = setup_lttng_msg(cmd_ctx, sizeof(reply)); + if (ret < 0) { + goto setup_error; + } + + /* Copy output list into message payload */ + memcpy(cmd_ctx->llm->payload, &reply, sizeof(reply)); + ret = LTTNG_OK; + break; + } + case LTTNG_SNAPSHOT_DEL_OUTPUT: + { + ret = cmd_snapshot_del_output(cmd_ctx->session, + &cmd_ctx->lsm->u.snapshot_output.output); + break; + } + case LTTNG_SNAPSHOT_LIST_OUTPUT: + { + ssize_t nb_output; + struct lttng_snapshot_output *outputs = NULL; + + nb_output = cmd_snapshot_list_outputs(cmd_ctx->session, &outputs); + if (nb_output < 0) { + ret = -nb_output; goto error; } - ret = cmd_set_filter(cmd_ctx->session, cmd_ctx->lsm->domain.type, - cmd_ctx->lsm->u.filter.channel_name, - cmd_ctx->lsm->u.filter.event_name, - bytecode); + ret = setup_lttng_msg(cmd_ctx, + nb_output * sizeof(struct lttng_snapshot_output)); + if (ret < 0) { + free(outputs); + goto setup_error; + } + + if (outputs) { + /* Copy output list into message payload */ + memcpy(cmd_ctx->llm->payload, outputs, + nb_output * sizeof(struct lttng_snapshot_output)); + free(outputs); + } + + ret = LTTNG_OK; + break; + } + case LTTNG_SNAPSHOT_RECORD: + { + ret = cmd_snapshot_record(cmd_ctx->session, + &cmd_ctx->lsm->u.snapshot_record.output, + cmd_ctx->lsm->u.snapshot_record.wait); + break; + } + case LTTNG_CREATE_SESSION_SNAPSHOT: + { + size_t nb_uri, len; + struct lttng_uri *uris = NULL; + + nb_uri = cmd_ctx->lsm->u.uri.size; + len = nb_uri * sizeof(struct lttng_uri); + + if (nb_uri > 0) { + uris = zmalloc(len); + if (uris == NULL) { + ret = LTTNG_ERR_FATAL; + goto error; + } + + /* Receive variable len data */ + DBG("Waiting for %zu URIs from client ...", nb_uri); + ret = lttcomm_recv_unix_sock(sock, uris, len); + if (ret <= 0) { + DBG("No URIs received from client... continuing"); + *sock_error = 1; + ret = LTTNG_ERR_SESSION_FAIL; + free(uris); + goto error; + } + + if (nb_uri == 1 && uris[0].dtype != LTTNG_DST_PATH) { + DBG("Creating session with ONE network URI is a bad call"); + ret = LTTNG_ERR_SESSION_FAIL; + free(uris); + goto error; + } + } + + ret = cmd_create_session_snapshot(cmd_ctx->lsm->session.name, uris, + nb_uri, &cmd_ctx->creds); + free(uris); + break; + } + case LTTNG_CREATE_SESSION_LIVE: + { + size_t nb_uri, len; + struct lttng_uri *uris = NULL; + + nb_uri = cmd_ctx->lsm->u.uri.size; + len = nb_uri * sizeof(struct lttng_uri); + + if (nb_uri > 0) { + uris = zmalloc(len); + if (uris == NULL) { + ret = LTTNG_ERR_FATAL; + goto error; + } + + /* Receive variable len data */ + DBG("Waiting for %zu URIs from client ...", nb_uri); + ret = lttcomm_recv_unix_sock(sock, uris, len); + if (ret <= 0) { + DBG("No URIs received from client... continuing"); + *sock_error = 1; + ret = LTTNG_ERR_SESSION_FAIL; + free(uris); + goto error; + } + + if (nb_uri == 1 && uris[0].dtype != LTTNG_DST_PATH) { + DBG("Creating session with ONE network URI is a bad call"); + ret = LTTNG_ERR_SESSION_FAIL; + free(uris); + goto error; + } + } + + ret = cmd_create_session_uri(cmd_ctx->lsm->session.name, uris, + nb_uri, &cmd_ctx->creds, cmd_ctx->lsm->u.session_live.timer_interval); + free(uris); + break; + } + case LTTNG_SAVE_SESSION: + { + ret = cmd_save_sessions(&cmd_ctx->lsm->u.save_session.attr, + &cmd_ctx->creds); + break; + } + case LTTNG_SET_SESSION_SHM_PATH: + { + ret = cmd_set_session_shm_path(cmd_ctx->session, + cmd_ctx->lsm->u.set_shm_path.shm_path); break; } default: - ret = LTTCOMM_UND; + ret = LTTNG_ERR_UND; break; } @@ -2720,13 +4033,16 @@ static void *thread_manage_health(void *data) int sock = -1, new_sock = -1, ret, i, pollfd, err = -1; uint32_t revents, nb_fd; struct lttng_poll_event events; - struct lttcomm_health_msg msg; - struct lttcomm_health_data reply; + struct health_comm_msg msg; + struct health_comm_reply reply; DBG("[thread] Manage health check started"); rcu_register_thread(); + /* We might hit an error path before this is created. */ + lttng_poll_init(&events); + /* Create unix socket */ sock = lttcomm_create_unix_sock(health_unix_sock_path); if (sock < 0) { @@ -2735,6 +4051,33 @@ static void *thread_manage_health(void *data) goto error; } + if (is_root) { + /* lttng health client socket path permissions */ + ret = chown(health_unix_sock_path, 0, + utils_get_group_id(tracing_group_name)); + if (ret < 0) { + ERR("Unable to set group on %s", health_unix_sock_path); + PERROR("chown"); + ret = -1; + goto error; + } + + ret = chmod(health_unix_sock_path, + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); + if (ret < 0) { + ERR("Unable to set permissions on %s", health_unix_sock_path); + PERROR("chmod"); + ret = -1; + goto error; + } + } + + /* + * Set the CLOEXEC flag. Return code is useless because either way, the + * show must go on. + */ + (void) utils_set_fd_cloexec(sock); + ret = lttcomm_listen_unix_sock(sock); if (ret < 0) { goto error; @@ -2744,7 +4087,7 @@ static void *thread_manage_health(void *data) * Pass 2 as size here for the thread quit pipe and client_sock. Nothing * more will be added to this poll set. */ - ret = create_thread_poll_set(&events, 2); + ret = sessiond_set_thread_pollset(&events, 2); if (ret < 0) { goto error; } @@ -2755,11 +4098,11 @@ static void *thread_manage_health(void *data) goto error; } + sessiond_notify_ready(); + while (1) { DBG("Health check ready"); - nb_fd = LTTNG_POLL_GETNB(&events); - /* Inifinite blocking call, waiting for transmission */ restart: ret = lttng_poll_wait(&events, -1); @@ -2773,13 +4116,20 @@ restart: goto error; } + nb_fd = ret; + for (i = 0; i < nb_fd; i++) { /* Fetch once the poll data */ revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); + if (!revents) { + /* No activity for this FD (poll implementation). */ + continue; + } + /* Thread quit pipe has been closed. Killing thread. */ - ret = check_thread_quit_pipe(pollfd, revents); + ret = sessiond_check_thread_quit_pipe(pollfd, revents); if (ret) { err = 0; goto exit; @@ -2799,6 +4149,12 @@ restart: goto error; } + /* + * Set the CLOEXEC flag. Return code is useless because either way, the + * show must go on. + */ + (void) utils_set_fd_cloexec(new_sock); + DBG("Receiving data from client for health..."); ret = lttcomm_recv_unix_sock(new_sock, (void *)&msg, sizeof(msg)); if (ret <= 0) { @@ -2812,47 +4168,19 @@ restart: } rcu_thread_online(); - - switch (msg.component) { - case LTTNG_HEALTH_CMD: - reply.ret_code = health_check_state(&health_thread_cmd); - break; - case LTTNG_HEALTH_APP_MANAGE: - reply.ret_code = health_check_state(&health_thread_app_manage); - break; - case LTTNG_HEALTH_APP_REG: - reply.ret_code = health_check_state(&health_thread_app_reg); - break; - case LTTNG_HEALTH_KERNEL: - reply.ret_code = health_check_state(&health_thread_kernel); - break; - case LTTNG_HEALTH_CONSUMER: - reply.ret_code = check_consumer_health(); - break; - case LTTNG_HEALTH_ALL: - reply.ret_code = - health_check_state(&health_thread_app_manage) && - health_check_state(&health_thread_app_reg) && - health_check_state(&health_thread_cmd) && - health_check_state(&health_thread_kernel) && - check_consumer_health(); - break; - default: - reply.ret_code = LTTCOMM_UND; - break; - } - - /* - * Flip ret value since 0 is a success and 1 indicates a bad health for - * the client where in the sessiond it is the opposite. Again, this is - * just to make things easier for us poor developer which enjoy a lot - * lazyness. - */ - if (reply.ret_code == 0 || reply.ret_code == 1) { - reply.ret_code = !reply.ret_code; + + memset(&reply, 0, sizeof(reply)); + for (i = 0; i < NR_HEALTH_SESSIOND_TYPES; i++) { + /* + * health_check_state returns 0 if health is + * bad. + */ + if (!health_check_state(health_sessiond, i)) { + reply.ret_code |= 1ULL << i; + } } - DBG2("Health check return value %d", reply.ret_code); + DBG2("Health check return value %" PRIx64, reply.ret_code); ret = send_unix_sock(new_sock, (void *) &reply, sizeof(reply)); if (ret < 0) { @@ -2880,12 +4208,6 @@ error: PERROR("close"); } } - if (new_sock >= 0) { - ret = close(new_sock); - if (ret) { - PERROR("close"); - } - } lttng_poll_clean(&events); @@ -2909,20 +4231,22 @@ static void *thread_manage_clients(void *data) rcu_register_thread(); - health_code_update(&health_thread_cmd); + health_register(health_sessiond, HEALTH_SESSIOND_TYPE_CMD); + + health_code_update(); ret = lttcomm_listen_unix_sock(client_sock); if (ret < 0) { - goto error; + goto error_listen; } /* * Pass 2 as size here for the thread quit pipe and client_sock. Nothing * more will be added to this poll set. */ - ret = create_thread_poll_set(&events, 2); + ret = sessiond_set_thread_pollset(&events, 2); if (ret < 0) { - goto error; + goto error_create_poll; } /* Add the application registration socket */ @@ -2931,25 +4255,32 @@ static void *thread_manage_clients(void *data) goto error; } - /* - * Notify parent pid that we are ready to accept command for client side. - */ - if (opt_sig_parent) { - kill(ppid, SIGUSR1); + sessiond_notify_ready(); + ret = sem_post(&load_info->message_thread_ready); + if (ret) { + PERROR("sem_post message_thread_ready"); + goto error; + } + + /* This testpoint is after we signal readiness to the parent. */ + if (testpoint(sessiond_thread_manage_clients)) { + goto error; + } + + if (testpoint(sessiond_thread_manage_clients_before_loop)) { + goto error; } - health_code_update(&health_thread_cmd); + health_code_update(); while (1) { DBG("Accepting client command ..."); - nb_fd = LTTNG_POLL_GETNB(&events); - /* Inifinite blocking call, waiting for transmission */ restart: - health_poll_update(&health_thread_cmd); + health_poll_entry(); ret = lttng_poll_wait(&events, -1); - health_poll_update(&health_thread_cmd); + health_poll_exit(); if (ret < 0) { /* * Restart interrupted system call. @@ -2960,15 +4291,22 @@ static void *thread_manage_clients(void *data) goto error; } + nb_fd = ret; + for (i = 0; i < nb_fd; i++) { /* Fetch once the poll data */ revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); - health_code_update(&health_thread_cmd); + health_code_update(); + + if (!revents) { + /* No activity for this FD (poll implementation). */ + continue; + } /* Thread quit pipe has been closed. Killing thread. */ - ret = check_thread_quit_pipe(pollfd, revents); + ret = sessiond_check_thread_quit_pipe(pollfd, revents); if (ret) { err = 0; goto exit; @@ -2985,13 +4323,19 @@ static void *thread_manage_clients(void *data) DBG("Wait for client response"); - health_code_update(&health_thread_cmd); + health_code_update(); sock = lttcomm_accept_unix_sock(client_sock); if (sock < 0) { goto error; } + /* + * Set the CLOEXEC flag. Return code is useless because either way, the + * show must go on. + */ + (void) utils_set_fd_cloexec(sock); + /* Set socket option for credentials retrieval */ ret = lttcomm_setsockopt_creds_unix_sock(sock); if (ret < 0) { @@ -3015,7 +4359,7 @@ static void *thread_manage_clients(void *data) cmd_ctx->llm = NULL; cmd_ctx->session = NULL; - health_code_update(&health_thread_cmd); + health_code_update(); /* * Data is received from the lttng client. The struct @@ -3036,7 +4380,7 @@ static void *thread_manage_clients(void *data) continue; } - health_code_update(&health_thread_cmd); + health_code_update(); // TODO: Validate cmd_ctx including sanity check for // security purpose. @@ -3051,13 +4395,11 @@ static void *thread_manage_clients(void *data) ret = process_client_msg(cmd_ctx, sock, &sock_error); rcu_thread_offline(); if (ret < 0) { - if (sock_error) { - ret = close(sock); - if (ret) { - PERROR("close"); - } - sock = -1; + ret = close(sock); + if (ret) { + PERROR("close"); } + sock = -1; /* * TODO: Inform client somehow of the fatal error. At * this point, ret < 0 means that a zmalloc failed @@ -3069,11 +4411,12 @@ static void *thread_manage_clients(void *data) continue; } - health_code_update(&health_thread_cmd); + health_code_update(); - DBG("Sending response (size: %d, retcode: %s)", + DBG("Sending response (size: %d, retcode: %s (%d))", cmd_ctx->lttng_msg_size, - lttng_strerror(-cmd_ctx->llm->ret_code)); + lttng_strerror(-cmd_ctx->llm->ret_code), + cmd_ctx->llm->ret_code); ret = send_unix_sock(sock, cmd_ctx->llm, cmd_ctx->lttng_msg_size); if (ret < 0) { ERR("Failed to send data back to client"); @@ -3088,18 +4431,23 @@ static void *thread_manage_clients(void *data) clean_command_ctx(&cmd_ctx); - health_code_update(&health_thread_cmd); + health_code_update(); } exit: error: - if (err) { - health_error(&health_thread_cmd); - ERR("Health error occurred in %s", __func__); + if (sock >= 0) { + ret = close(sock); + if (ret) { + PERROR("close"); + } } - health_exit(&health_thread_cmd); - DBG("Client thread dying"); + lttng_poll_clean(&events); + clean_command_ctx(&cmd_ctx); + +error_listen: +error_create_poll: unlink(client_unix_sock_path); if (client_sock >= 0) { ret = close(client_sock); @@ -3107,17 +4455,39 @@ error: PERROR("close"); } } - if (sock >= 0) { - ret = close(sock); - if (ret) { - PERROR("close"); - } + + if (err) { + health_error(); + ERR("Health error occurred in %s", __func__); } - lttng_poll_clean(&events); - clean_command_ctx(&cmd_ctx); + health_unregister(health_sessiond); + + DBG("Client thread dying"); rcu_unregister_thread(); + + /* + * Since we are creating the consumer threads, we own them, so we need + * to join them before our thread exits. + */ + ret = join_consumer_thread(&kconsumer_data); + if (ret) { + errno = ret; + PERROR("join_consumer"); + } + + ret = join_consumer_thread(&ustconsumer32_data); + if (ret) { + errno = ret; + PERROR("join_consumer ust32"); + } + + ret = join_consumer_thread(&ustconsumer64_data); + if (ret) { + errno = ret; + PERROR("join_consumer ust64"); + } return NULL; } @@ -3142,134 +4512,454 @@ static void usage(void) fprintf(stderr, " --consumerd64-path PATH Specify path for the 64-bit UST consumer daemon binary\n"); fprintf(stderr, " --consumerd64-libdir PATH Specify path for the 64-bit UST consumer daemon libraries\n"); fprintf(stderr, " -d, --daemonize Start as a daemon.\n"); + fprintf(stderr, " -b, --background Start as a daemon, keeping console open.\n"); fprintf(stderr, " -g, --group NAME Specify the tracing group name. (default: tracing)\n"); fprintf(stderr, " -V, --version Show version number.\n"); - fprintf(stderr, " -S, --sig-parent Send SIGCHLD to parent pid to notify readiness.\n"); + fprintf(stderr, " -S, --sig-parent Send SIGUSR1 to parent pid to notify readiness.\n"); fprintf(stderr, " -q, --quiet No output at all.\n"); fprintf(stderr, " -v, --verbose Verbose mode. Activate DBG() macro.\n"); + fprintf(stderr, " -p, --pidfile FILE Write a pid to FILE name overriding the default value.\n"); fprintf(stderr, " --verbose-consumer Verbose mode for consumer. Activate DBG() macro.\n"); fprintf(stderr, " --no-kernel Disable kernel tracer\n"); + fprintf(stderr, " --agent-tcp-port Agent registration TCP port\n"); + fprintf(stderr, " -f --config PATH Load daemon configuration file\n"); + fprintf(stderr, " -l --load PATH Load session configuration\n"); + fprintf(stderr, " --kmod-probes Specify kernel module probes to load\n"); + fprintf(stderr, " --extra-kmod-probes Specify extra kernel module probes to load\n"); +} + +static int string_match(const char *str1, const char *str2) +{ + return (str1 && str2) && !strcmp(str1, str2); } /* - * daemon argument parsing + * Take an option from the getopt output and set it in the right variable to be + * used later. + * + * Return 0 on success else a negative value. */ -static int parse_args(int argc, char **argv) +static int set_option(int opt, const char *arg, const char *optname) { - int c; - - static struct option long_options[] = { - { "client-sock", 1, 0, 'c' }, - { "apps-sock", 1, 0, 'a' }, - { "kconsumerd-cmd-sock", 1, 0, 'C' }, - { "kconsumerd-err-sock", 1, 0, 'E' }, - { "ustconsumerd32-cmd-sock", 1, 0, 'G' }, - { "ustconsumerd32-err-sock", 1, 0, 'H' }, - { "ustconsumerd64-cmd-sock", 1, 0, 'D' }, - { "ustconsumerd64-err-sock", 1, 0, 'F' }, - { "consumerd32-path", 1, 0, 'u' }, - { "consumerd32-libdir", 1, 0, 'U' }, - { "consumerd64-path", 1, 0, 't' }, - { "consumerd64-libdir", 1, 0, 'T' }, - { "daemonize", 0, 0, 'd' }, - { "sig-parent", 0, 0, 'S' }, - { "help", 0, 0, 'h' }, - { "group", 1, 0, 'g' }, - { "version", 0, 0, 'V' }, - { "quiet", 0, 0, 'q' }, - { "verbose", 0, 0, 'v' }, - { "verbose-consumer", 0, 0, 'Z' }, - { "no-kernel", 0, 0, 'N' }, - { NULL, 0, 0, 0 } - }; + int ret = 0; + + if (arg && arg[0] == '\0') { + /* + * This only happens if the value is read from daemon config + * file. This means the option requires an argument and the + * configuration file contains a line such as: + * my_option = + */ + ret = -EINVAL; + goto end; + } + + if (string_match(optname, "client-sock") || opt == 'c') { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "-c, --client-sock"); + } else { + snprintf(client_unix_sock_path, PATH_MAX, "%s", arg); + } + } else if (string_match(optname, "apps-sock") || opt == 'a') { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "-a, --apps-sock"); + } else { + snprintf(apps_unix_sock_path, PATH_MAX, "%s", arg); + } + } else if (string_match(optname, "daemonize") || opt == 'd') { + opt_daemon = 1; + } else if (string_match(optname, "background") || opt == 'b') { + opt_background = 1; + } else if (string_match(optname, "group") || opt == 'g') { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "-g, --group"); + } else { + /* + * If the override option is set, the pointer points to a + * *non* const thus freeing it even though the variable type is + * set to const. + */ + if (tracing_group_name_override) { + free((void *) tracing_group_name); + } + tracing_group_name = strdup(arg); + if (!tracing_group_name) { + PERROR("strdup"); + ret = -ENOMEM; + } + tracing_group_name_override = 1; + } + } else if (string_match(optname, "help") || opt == 'h') { + usage(); + exit(EXIT_SUCCESS); + } else if (string_match(optname, "version") || opt == 'V') { + fprintf(stdout, "%s\n", VERSION); + exit(EXIT_SUCCESS); + } else if (string_match(optname, "sig-parent") || opt == 'S') { + opt_sig_parent = 1; + } else if (string_match(optname, "kconsumerd-err-sock")) { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "--kconsumerd-err-sock"); + } else { + snprintf(kconsumer_data.err_unix_sock_path, PATH_MAX, "%s", arg); + } + } else if (string_match(optname, "kconsumerd-cmd-sock")) { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "--kconsumerd-cmd-sock"); + } else { + snprintf(kconsumer_data.cmd_unix_sock_path, PATH_MAX, "%s", arg); + } + } else if (string_match(optname, "ustconsumerd64-err-sock")) { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "--ustconsumerd64-err-sock"); + } else { + snprintf(ustconsumer64_data.err_unix_sock_path, PATH_MAX, "%s", arg); + } + } else if (string_match(optname, "ustconsumerd64-cmd-sock")) { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "--ustconsumerd64-cmd-sock"); + } else { + snprintf(ustconsumer64_data.cmd_unix_sock_path, PATH_MAX, "%s", arg); + } + } else if (string_match(optname, "ustconsumerd32-err-sock")) { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "--ustconsumerd32-err-sock"); + } else { + snprintf(ustconsumer32_data.err_unix_sock_path, PATH_MAX, "%s", arg); + } + } else if (string_match(optname, "ustconsumerd32-cmd-sock")) { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "--ustconsumerd32-cmd-sock"); + } else { + snprintf(ustconsumer32_data.cmd_unix_sock_path, PATH_MAX, "%s", arg); + } + } else if (string_match(optname, "no-kernel")) { + opt_no_kernel = 1; + } else if (string_match(optname, "quiet") || opt == 'q') { + lttng_opt_quiet = 1; + } else if (string_match(optname, "verbose") || opt == 'v') { + /* Verbose level can increase using multiple -v */ + if (arg) { + /* Value obtained from config file */ + lttng_opt_verbose = config_parse_value(arg); + } else { + /* -v used on command line */ + lttng_opt_verbose++; + } + /* Clamp value to [0, 3] */ + lttng_opt_verbose = lttng_opt_verbose < 0 ? 0 : + (lttng_opt_verbose <= 3 ? lttng_opt_verbose : 3); + } else if (string_match(optname, "verbose-consumer")) { + if (arg) { + opt_verbose_consumer = config_parse_value(arg); + } else { + opt_verbose_consumer += 1; + } + } else if (string_match(optname, "consumerd32-path")) { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "--consumerd32-path"); + } else { + if (consumerd32_bin_override) { + free((void *) consumerd32_bin); + } + consumerd32_bin = strdup(arg); + if (!consumerd32_bin) { + PERROR("strdup"); + ret = -ENOMEM; + } + consumerd32_bin_override = 1; + } + } else if (string_match(optname, "consumerd32-libdir")) { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "--consumerd32-libdir"); + } else { + if (consumerd32_libdir_override) { + free((void *) consumerd32_libdir); + } + consumerd32_libdir = strdup(arg); + if (!consumerd32_libdir) { + PERROR("strdup"); + ret = -ENOMEM; + } + consumerd32_libdir_override = 1; + } + } else if (string_match(optname, "consumerd64-path")) { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "--consumerd64-path"); + } else { + if (consumerd64_bin_override) { + free((void *) consumerd64_bin); + } + consumerd64_bin = strdup(arg); + if (!consumerd64_bin) { + PERROR("strdup"); + ret = -ENOMEM; + } + consumerd64_bin_override = 1; + } + } else if (string_match(optname, "consumerd64-libdir")) { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "--consumerd64-libdir"); + } else { + if (consumerd64_libdir_override) { + free((void *) consumerd64_libdir); + } + consumerd64_libdir = strdup(arg); + if (!consumerd64_libdir) { + PERROR("strdup"); + ret = -ENOMEM; + } + consumerd64_libdir_override = 1; + } + } else if (string_match(optname, "pidfile") || opt == 'p') { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "-p, --pidfile"); + } else { + free(opt_pidfile); + opt_pidfile = strdup(arg); + if (!opt_pidfile) { + PERROR("strdup"); + ret = -ENOMEM; + } + } + } else if (string_match(optname, "agent-tcp-port")) { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "--agent-tcp-port"); + } else { + unsigned long v; + + if (!arg) { + ret = -EINVAL; + goto end; + } + errno = 0; + v = strtoul(arg, NULL, 0); + if (errno != 0 || !isdigit(arg[0])) { + ERR("Wrong value in --agent-tcp-port parameter: %s", arg); + return -1; + } + if (v == 0 || v >= 65535) { + ERR("Port overflow in --agent-tcp-port parameter: %s", arg); + return -1; + } + agent_tcp_port = (uint32_t) v; + DBG3("Agent TCP port set to non default: %u", agent_tcp_port); + } + } else if (string_match(optname, "load") || opt == 'l') { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "-l, --load"); + } else { + free(opt_load_session_path); + opt_load_session_path = strdup(arg); + if (!opt_load_session_path) { + PERROR("strdup"); + ret = -ENOMEM; + } + } + } else if (string_match(optname, "kmod-probes")) { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "--kmod-probes"); + } else { + free(kmod_probes_list); + kmod_probes_list = strdup(arg); + if (!kmod_probes_list) { + PERROR("strdup"); + ret = -ENOMEM; + } + } + } else if (string_match(optname, "extra-kmod-probes")) { + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "--extra-kmod-probes"); + } else { + free(kmod_extra_probes_list); + kmod_extra_probes_list = strdup(arg); + if (!kmod_extra_probes_list) { + PERROR("strdup"); + ret = -ENOMEM; + } + } + } else if (string_match(optname, "config") || opt == 'f') { + /* This is handled in set_options() thus silent skip. */ + goto end; + } else { + /* Unknown option or other error. + * Error is printed by getopt, just return */ + ret = -1; + } + +end: + if (ret == -EINVAL) { + const char *opt_name = "unknown"; + int i; + + for (i = 0; i < sizeof(long_options) / sizeof(struct option); + i++) { + if (opt == long_options[i].val) { + opt_name = long_options[i].name; + break; + } + } + + WARN("Invalid argument provided for option \"%s\", using default value.", + opt_name); + } + + return ret; +} + +/* + * config_entry_handler_cb used to handle options read from a config file. + * See config_entry_handler_cb comment in common/config/config.h for the + * return value conventions. + */ +static int config_entry_handler(const struct config_entry *entry, void *unused) +{ + int ret = 0, i; + + if (!entry || !entry->name || !entry->value) { + ret = -EINVAL; + goto end; + } + + /* Check if the option is to be ignored */ + for (i = 0; i < sizeof(config_ignore_options) / sizeof(char *); i++) { + if (!strcmp(entry->name, config_ignore_options[i])) { + goto end; + } + } + + for (i = 0; i < (sizeof(long_options) / sizeof(struct option)) - 1; + i++) { + + /* Ignore if not fully matched. */ + if (strcmp(entry->name, long_options[i].name)) { + continue; + } + + /* + * If the option takes no argument on the command line, we have to + * check if the value is "true". We support non-zero numeric values, + * true, on and yes. + */ + if (!long_options[i].has_arg) { + ret = config_parse_value(entry->value); + if (ret <= 0) { + if (ret) { + WARN("Invalid configuration value \"%s\" for option %s", + entry->value, entry->name); + } + /* False, skip boolean config option. */ + goto end; + } + } + ret = set_option(long_options[i].val, entry->value, entry->name); + goto end; + } + + WARN("Unrecognized option \"%s\" in daemon configuration file.", entry->name); + +end: + return ret; +} + +/* + * daemon configuration loading and argument parsing + */ +static int set_options(int argc, char **argv) +{ + int ret = 0, c = 0, option_index = 0; + int orig_optopt = optopt, orig_optind = optind; + char *optstring; + const char *config_path = NULL; + + optstring = utils_generate_optstring(long_options, + sizeof(long_options) / sizeof(struct option)); + if (!optstring) { + ret = -ENOMEM; + goto end; + } + + /* Check for the --config option */ + while ((c = getopt_long(argc, argv, optstring, long_options, + &option_index)) != -1) { + if (c == '?') { + ret = -EINVAL; + goto end; + } else if (c != 'f') { + /* if not equal to --config option. */ + continue; + } + + if (lttng_is_setuid_setgid()) { + WARN("Getting '%s' argument from setuid/setgid binary refused for security reasons.", + "-f, --config"); + } else { + config_path = utils_expand_path(optarg); + if (!config_path) { + ERR("Failed to resolve path: %s", optarg); + } + } + } + + ret = config_get_section_entries(config_path, config_section_name, + config_entry_handler, NULL); + if (ret) { + if (ret > 0) { + ERR("Invalid configuration option at line %i", ret); + ret = -1; + } + goto end; + } + + /* Reset getopt's global state */ + optopt = orig_optopt; + optind = orig_optind; while (1) { - int option_index = 0; - c = getopt_long(argc, argv, "dhqvVSN" "a:c:g:s:C:E:D:F:Z:u:t", - long_options, &option_index); + option_index = -1; + /* + * getopt_long() will not set option_index if it encounters a + * short option. + */ + c = getopt_long(argc, argv, optstring, long_options, + &option_index); if (c == -1) { break; } - switch (c) { - case 0: - fprintf(stderr, "option %s", long_options[option_index].name); - if (optarg) { - fprintf(stderr, " with arg %s\n", optarg); - } - break; - case 'c': - snprintf(client_unix_sock_path, PATH_MAX, "%s", optarg); - break; - case 'a': - snprintf(apps_unix_sock_path, PATH_MAX, "%s", optarg); - break; - case 'd': - opt_daemon = 1; - break; - case 'g': - opt_tracing_group = optarg; - break; - case 'h': - usage(); - exit(EXIT_FAILURE); - case 'V': - fprintf(stdout, "%s\n", VERSION); - exit(EXIT_SUCCESS); - case 'S': - opt_sig_parent = 1; - break; - case 'E': - snprintf(kconsumer_data.err_unix_sock_path, PATH_MAX, "%s", optarg); - break; - case 'C': - snprintf(kconsumer_data.cmd_unix_sock_path, PATH_MAX, "%s", optarg); - break; - case 'F': - snprintf(ustconsumer64_data.err_unix_sock_path, PATH_MAX, "%s", optarg); - break; - case 'D': - snprintf(ustconsumer64_data.cmd_unix_sock_path, PATH_MAX, "%s", optarg); - break; - case 'H': - snprintf(ustconsumer32_data.err_unix_sock_path, PATH_MAX, "%s", optarg); - break; - case 'G': - snprintf(ustconsumer32_data.cmd_unix_sock_path, PATH_MAX, "%s", optarg); - break; - case 'N': - opt_no_kernel = 1; - break; - case 'q': - lttng_opt_quiet = 1; - break; - case 'v': - /* Verbose level can increase using multiple -v */ - lttng_opt_verbose += 1; - break; - case 'Z': - opt_verbose_consumer += 1; - break; - case 'u': - consumerd32_bin= optarg; - break; - case 'U': - consumerd32_libdir = optarg; - break; - case 't': - consumerd64_bin = optarg; - break; - case 'T': - consumerd64_libdir = optarg; + /* + * Pass NULL as the long option name if popt left the index + * unset. + */ + ret = set_option(c, optarg, + option_index < 0 ? NULL : + long_options[option_index].name); + if (ret < 0) { break; - default: - /* Unknown option or other error. - * Error is printed by getopt, just return */ - return -1; } } - return 0; +end: + free(optstring); + return ret; } /* @@ -3292,6 +4982,14 @@ static int init_daemon_socket(void) goto end; } + /* Set the cloexec flag */ + ret = utils_set_fd_cloexec(client_sock); + if (ret < 0) { + ERR("Unable to set CLOEXEC flag to the client Unix socket (fd: %d). " + "Continuing but note that the consumer daemon will have a " + "reference to this socket on exec()", client_sock); + } + /* File permission MUST be 660 */ ret = chmod(client_unix_sock_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret < 0) { @@ -3308,6 +5006,14 @@ static int init_daemon_socket(void) goto end; } + /* Set the cloexec flag */ + ret = utils_set_fd_cloexec(apps_sock); + if (ret < 0) { + ERR("Unable to set CLOEXEC flag to the app Unix socket (fd: %d). " + "Continuing but note that the consumer daemon will have a " + "reference to this socket on exec()", apps_sock); + } + /* File permission MUST be 666 */ ret = chmod(apps_unix_sock_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); @@ -3317,6 +5023,9 @@ static int init_daemon_socket(void) goto end; } + DBG3("Session daemon client socket %d and application socket %d created", + client_sock, apps_sock); + end: umask(old_umask); return ret; @@ -3347,14 +5056,7 @@ static int set_permissions(char *rundir) int ret; gid_t gid; - ret = allowed_group(); - if (ret < 0) { - WARN("No tracing group detected"); - ret = 0; - goto end; - } - - gid = ret; + gid = utils_get_group_id(tracing_group_name); /* Set lttng run dir */ ret = chown(rundir, 0, gid); @@ -3363,8 +5065,12 @@ static int set_permissions(char *rundir) PERROR("chown"); } - /* Ensure tracing group can search the run dir */ - ret = chmod(rundir, S_IRWXU | S_IXGRP | S_IXOTH); + /* + * Ensure all applications and tracing group can search the run + * dir. Allow everyone to read the directory, since it does not + * buy us anything to hide its content. + */ + ret = chmod(rundir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); if (ret < 0) { ERR("Unable to set permissions on %s", rundir); PERROR("chmod"); @@ -3378,21 +5084,21 @@ static int set_permissions(char *rundir) } /* kconsumer error socket path */ - ret = chown(kconsumer_data.err_unix_sock_path, 0, gid); + ret = chown(kconsumer_data.err_unix_sock_path, 0, 0); if (ret < 0) { ERR("Unable to set group on %s", kconsumer_data.err_unix_sock_path); PERROR("chown"); } /* 64-bit ustconsumer error socket path */ - ret = chown(ustconsumer64_data.err_unix_sock_path, 0, gid); + ret = chown(ustconsumer64_data.err_unix_sock_path, 0, 0); if (ret < 0) { ERR("Unable to set group on %s", ustconsumer64_data.err_unix_sock_path); PERROR("chown"); } /* 32-bit ustconsumer compat32 error socket path */ - ret = chown(ustconsumer32_data.err_unix_sock_path, 0, gid); + ret = chown(ustconsumer32_data.err_unix_sock_path, 0, 0); if (ret < 0) { ERR("Unable to set group on %s", ustconsumer32_data.err_unix_sock_path); PERROR("chown"); @@ -3400,7 +5106,6 @@ static int set_permissions(char *rundir) DBG("All permissions are set"); -end: return ret; } @@ -3437,7 +5142,7 @@ static int set_consumer_sockets(struct consumer_data *consumer_data, int ret; char path[PATH_MAX]; - switch (consumer_data->type) { + switch (consumer_data->type) { case LTTNG_CONSUMER_KERNEL: snprintf(path, PATH_MAX, DEFAULT_KCONSUMERD_PATH, rundir); break; @@ -3455,7 +5160,7 @@ static int set_consumer_sockets(struct consumer_data *consumer_data, DBG2("Creating consumer directory: %s", path); - ret = mkdir(path, S_IRWXU); + ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP); if (ret < 0) { if (errno != EEXIST) { PERROR("mkdir"); @@ -3464,6 +5169,14 @@ static int set_consumer_sockets(struct consumer_data *consumer_data, } ret = -1; } + if (is_root) { + ret = chown(path, 0, utils_get_group_id(tracing_group_name)); + if (ret < 0) { + ERR("Unable to set group on %s", path); + PERROR("chown"); + goto error; + } + } /* Create the kconsumerd error unix socket */ consumer_data->err_sock = @@ -3474,6 +5187,16 @@ static int set_consumer_sockets(struct consumer_data *consumer_data, goto error; } + /* + * Set the CLOEXEC flag. Return code is useless because either way, the + * show must go on. + */ + ret = utils_set_fd_cloexec(consumer_data->err_sock); + if (ret < 0) { + PERROR("utils_set_fd_cloexec"); + /* continue anyway */ + } + /* File permission MUST be 660 */ ret = chmod(consumer_data->err_unix_sock_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); @@ -3507,6 +5230,9 @@ static void sighandler(int sig) DBG("SIGTERM caught"); stop_threads(); break; + case SIGUSR1: + CMM_STORE_SHARED(recv_child_signal, 1); + break; default: break; } @@ -3545,7 +5271,12 @@ static int set_signal_handler(void) return ret; } - DBG("Signal handler set for SIGTERM, SIGPIPE and SIGINT"); + if ((ret = sigaction(SIGUSR1, &sa, NULL)) < 0) { + PERROR("sigaction"); + return ret; + } + + DBG("Signal handler set for SIGTERM, SIGUSR1, SIGPIPE and SIGINT"); return ret; } @@ -3569,54 +5300,189 @@ static void set_ulimit(void) } } +/* + * Write pidfile using the rundir and opt_pidfile. + */ +static int write_pidfile(void) +{ + int ret; + char pidfile_path[PATH_MAX]; + + assert(rundir); + + if (opt_pidfile) { + strncpy(pidfile_path, opt_pidfile, sizeof(pidfile_path)); + } else { + /* Build pidfile path from rundir and opt_pidfile. */ + ret = snprintf(pidfile_path, sizeof(pidfile_path), "%s/" + DEFAULT_LTTNG_SESSIOND_PIDFILE, rundir); + if (ret < 0) { + PERROR("snprintf pidfile path"); + goto error; + } + } + + /* + * Create pid file in rundir. + */ + ret = utils_create_pid_file(getpid(), pidfile_path); +error: + return ret; +} + +/* + * Create lockfile using the rundir and return its fd. + */ +static int create_lockfile(void) +{ + int ret; + char lockfile_path[PATH_MAX]; + + ret = generate_lock_file_path(lockfile_path, sizeof(lockfile_path)); + if (ret < 0) { + goto error; + } + + ret = utils_create_lock_file(lockfile_path); +error: + return ret; +} + +/* + * Write agent TCP port using the rundir. + */ +static int write_agent_port(void) +{ + int ret; + char path[PATH_MAX]; + + assert(rundir); + + ret = snprintf(path, sizeof(path), "%s/" + DEFAULT_LTTNG_SESSIOND_AGENTPORT_FILE, rundir); + if (ret < 0) { + PERROR("snprintf agent port path"); + goto error; + } + + /* + * Create TCP agent port file in rundir. + */ + ret = utils_create_pid_file(agent_tcp_port, path); + +error: + return ret; +} + /* * main */ int main(int argc, char **argv) { - int ret = 0; + int ret = 0, retval = 0; void *status; - const char *home_path; + const char *home_path, *env_app_timeout; init_kernel_workarounds(); rcu_register_thread(); + if (set_signal_handler()) { + retval = -1; + goto exit_set_signal_handler; + } + setup_consumerd_path(); - /* Parse arguments */ + page_size = sysconf(_SC_PAGESIZE); + if (page_size < 0) { + PERROR("sysconf _SC_PAGESIZE"); + page_size = LONG_MAX; + WARN("Fallback page size to %ld", page_size); + } + + /* + * Parse arguments and load the daemon configuration file. + * + * We have an exit_options exit path to free memory reserved by + * set_options. This is needed because the rest of sessiond_cleanup() + * depends on ht_cleanup_thread, which depends on lttng_daemonize, which + * depends on set_options. + */ progname = argv[0]; - if ((ret = parse_args(argc, argv) < 0)) { - goto error; + if (set_options(argc, argv)) { + retval = -1; + goto exit_options; } /* Daemonize */ - if (opt_daemon) { + if (opt_daemon || opt_background) { int i; - /* - * fork - * child: setsid, close FD 0, 1, 2, chdir / - * parent: exit (if fork is successful) - */ - ret = daemon(0, 0); + ret = lttng_daemonize(&child_ppid, &recv_child_signal, + !opt_background); if (ret < 0) { - PERROR("daemon"); - goto error; + retval = -1; + goto exit_options; } + /* - * We are in the child. Make sure all other file - * descriptors are closed, in case we are called with - * more opened file descriptors than the standard ones. + * We are in the child. Make sure all other file descriptors are + * closed, in case we are called with more opened file + * descriptors than the standard ones. */ for (i = 3; i < sysconf(_SC_OPEN_MAX); i++) { (void) close(i); } } + /* + * Starting from here, we can create threads. This needs to be after + * lttng_daemonize due to RCU. + */ + + /* + * Initialize the health check subsystem. This call should set the + * appropriate time values. + */ + health_sessiond = health_app_create(NR_HEALTH_SESSIOND_TYPES); + if (!health_sessiond) { + PERROR("health_app_create error"); + retval = -1; + goto exit_health_sessiond_cleanup; + } + + if (init_ht_cleanup_quit_pipe()) { + retval = -1; + goto exit_ht_cleanup_quit_pipe; + } + + /* Setup the thread ht_cleanup communication pipe. */ + if (utils_create_pipe_cloexec(ht_cleanup_pipe)) { + retval = -1; + goto exit_ht_cleanup_pipe; + } + + /* Set up max poll set size */ + if (lttng_poll_set_max_size()) { + retval = -1; + goto exit_set_max_size; + } + + /* Create thread to clean up RCU hash tables */ + ret = pthread_create(&ht_cleanup_thread, NULL, + thread_ht_cleanup, (void *) NULL); + if (ret) { + errno = ret; + PERROR("pthread_create ht_cleanup"); + retval = -1; + goto exit_ht_cleanup; + } + /* Create thread quit pipe */ - if ((ret = init_thread_quit_pipe()) < 0) { - goto error; + if (init_thread_quit_pipe()) { + retval = -1; + goto exit_init_data; } /* Check if daemon is UID = 0 */ @@ -3624,51 +5490,80 @@ int main(int argc, char **argv) if (is_root) { rundir = strdup(DEFAULT_LTTNG_RUNDIR); + if (!rundir) { + retval = -1; + goto exit_init_data; + } /* Create global run dir with root access */ - ret = create_lttng_rundir(rundir); - if (ret < 0) { - goto error; + if (create_lttng_rundir(rundir)) { + retval = -1; + goto exit_init_data; } if (strlen(apps_unix_sock_path) == 0) { - snprintf(apps_unix_sock_path, PATH_MAX, + ret = snprintf(apps_unix_sock_path, PATH_MAX, DEFAULT_GLOBAL_APPS_UNIX_SOCK); + if (ret < 0) { + retval = -1; + goto exit_init_data; + } } if (strlen(client_unix_sock_path) == 0) { - snprintf(client_unix_sock_path, PATH_MAX, + ret = snprintf(client_unix_sock_path, PATH_MAX, DEFAULT_GLOBAL_CLIENT_UNIX_SOCK); + if (ret < 0) { + retval = -1; + goto exit_init_data; + } } /* Set global SHM for ust */ if (strlen(wait_shm_path) == 0) { - snprintf(wait_shm_path, PATH_MAX, + ret = snprintf(wait_shm_path, PATH_MAX, DEFAULT_GLOBAL_APPS_WAIT_SHM_PATH); + if (ret < 0) { + retval = -1; + goto exit_init_data; + } } if (strlen(health_unix_sock_path) == 0) { - snprintf(health_unix_sock_path, sizeof(health_unix_sock_path), + ret = snprintf(health_unix_sock_path, + sizeof(health_unix_sock_path), DEFAULT_GLOBAL_HEALTH_UNIX_SOCK); + if (ret < 0) { + retval = -1; + goto exit_init_data; + } } /* Setup kernel consumerd path */ - snprintf(kconsumer_data.err_unix_sock_path, PATH_MAX, + ret = snprintf(kconsumer_data.err_unix_sock_path, PATH_MAX, DEFAULT_KCONSUMERD_ERR_SOCK_PATH, rundir); - snprintf(kconsumer_data.cmd_unix_sock_path, PATH_MAX, + if (ret < 0) { + retval = -1; + goto exit_init_data; + } + ret = snprintf(kconsumer_data.cmd_unix_sock_path, PATH_MAX, DEFAULT_KCONSUMERD_CMD_SOCK_PATH, rundir); + if (ret < 0) { + retval = -1; + goto exit_init_data; + } DBG2("Kernel consumer err path: %s", kconsumer_data.err_unix_sock_path); DBG2("Kernel consumer cmd path: %s", kconsumer_data.cmd_unix_sock_path); } else { - home_path = get_home_dir(); + home_path = utils_get_home_dir(); if (home_path == NULL) { /* TODO: Add --socket PATH option */ ERR("Can't get HOME directory for sockets creation."); - ret = -EPERM; - goto error; + retval = -1; + goto exit_init_data; } /* @@ -3677,52 +5572,90 @@ int main(int argc, char **argv) */ ret = asprintf(&rundir, DEFAULT_LTTNG_HOME_RUNDIR, home_path); if (ret < 0) { - ret = -ENOMEM; - goto error; + retval = -1; + goto exit_init_data; } - ret = create_lttng_rundir(rundir); - if (ret < 0) { - goto error; + if (create_lttng_rundir(rundir)) { + retval = -1; + goto exit_init_data; } if (strlen(apps_unix_sock_path) == 0) { - snprintf(apps_unix_sock_path, PATH_MAX, - DEFAULT_HOME_APPS_UNIX_SOCK, home_path); + ret = snprintf(apps_unix_sock_path, PATH_MAX, + DEFAULT_HOME_APPS_UNIX_SOCK, + home_path); + if (ret < 0) { + retval = -1; + goto exit_init_data; + } } /* Set the cli tool unix socket path */ if (strlen(client_unix_sock_path) == 0) { - snprintf(client_unix_sock_path, PATH_MAX, - DEFAULT_HOME_CLIENT_UNIX_SOCK, home_path); + ret = snprintf(client_unix_sock_path, PATH_MAX, + DEFAULT_HOME_CLIENT_UNIX_SOCK, + home_path); + if (ret < 0) { + retval = -1; + goto exit_init_data; + } } /* Set global SHM for ust */ if (strlen(wait_shm_path) == 0) { - snprintf(wait_shm_path, PATH_MAX, - DEFAULT_HOME_APPS_WAIT_SHM_PATH, geteuid()); + ret = snprintf(wait_shm_path, PATH_MAX, + DEFAULT_HOME_APPS_WAIT_SHM_PATH, + getuid()); + if (ret < 0) { + retval = -1; + goto exit_init_data; + } } /* Set health check Unix path */ if (strlen(health_unix_sock_path) == 0) { - snprintf(health_unix_sock_path, sizeof(health_unix_sock_path), - DEFAULT_HOME_HEALTH_UNIX_SOCK, home_path); + ret = snprintf(health_unix_sock_path, + sizeof(health_unix_sock_path), + DEFAULT_HOME_HEALTH_UNIX_SOCK, + home_path); + if (ret < 0) { + retval = -1; + goto exit_init_data; + } } } + lockfile_fd = create_lockfile(); + if (lockfile_fd < 0) { + retval = -1; + goto exit_init_data; + } + /* Set consumer initial state */ kernel_consumerd_state = CONSUMER_STOPPED; ust_consumerd_state = CONSUMER_STOPPED; DBG("Client socket path %s", client_unix_sock_path); DBG("Application socket path %s", apps_unix_sock_path); + DBG("Application wait path %s", wait_shm_path); DBG("LTTng run directory path: %s", rundir); /* 32 bits consumerd path setup */ - snprintf(ustconsumer32_data.err_unix_sock_path, PATH_MAX, + ret = snprintf(ustconsumer32_data.err_unix_sock_path, PATH_MAX, DEFAULT_USTCONSUMERD32_ERR_SOCK_PATH, rundir); - snprintf(ustconsumer32_data.cmd_unix_sock_path, PATH_MAX, + if (ret < 0) { + PERROR("snprintf 32-bit consumer error socket path"); + retval = -1; + goto exit_init_data; + } + ret = snprintf(ustconsumer32_data.cmd_unix_sock_path, PATH_MAX, DEFAULT_USTCONSUMERD32_CMD_SOCK_PATH, rundir); + if (ret < 0) { + PERROR("snprintf 32-bit consumer command socket path"); + retval = -1; + goto exit_init_data; + } DBG2("UST consumer 32 bits err path: %s", ustconsumer32_data.err_unix_sock_path); @@ -3730,10 +5663,20 @@ int main(int argc, char **argv) ustconsumer32_data.cmd_unix_sock_path); /* 64 bits consumerd path setup */ - snprintf(ustconsumer64_data.err_unix_sock_path, PATH_MAX, + ret = snprintf(ustconsumer64_data.err_unix_sock_path, PATH_MAX, DEFAULT_USTCONSUMERD64_ERR_SOCK_PATH, rundir); - snprintf(ustconsumer64_data.cmd_unix_sock_path, PATH_MAX, + if (ret < 0) { + PERROR("snprintf 64-bit consumer error socket path"); + retval = -1; + goto exit_init_data; + } + ret = snprintf(ustconsumer64_data.cmd_unix_sock_path, PATH_MAX, DEFAULT_USTCONSUMERD64_CMD_SOCK_PATH, rundir); + if (ret < 0) { + PERROR("snprintf 64-bit consumer command socket path"); + retval = -1; + goto exit_init_data; + } DBG2("UST consumer 64 bits err path: %s", ustconsumer64_data.err_unix_sock_path); @@ -3743,22 +5686,35 @@ int main(int argc, char **argv) /* * See if daemon already exist. */ - if ((ret = check_existing_daemon()) < 0) { + if (check_existing_daemon()) { ERR("Already running daemon.\n"); /* * We do not goto exit because we must not cleanup() * because a daemon is already running. */ - goto error; + retval = -1; + goto exit_init_data; } /* * Init UST app hash table. Alloc hash table before this point since * cleanup() can get called after that point. */ - ust_app_ht_alloc(); + if (ust_app_ht_alloc()) { + ERR("Failed to allocate UST app hash table"); + retval = -1; + goto exit_init_data; + } - /* After this point, we can safely call cleanup() with "goto exit" */ + /* + * Initialize agent app hash table. We allocate the hash table here + * since cleanup() can get called after this point. + */ + if (agent_app_ht_alloc()) { + ERR("Failed to allocate Agent app hash table"); + retval = -1; + goto exit_init_data; + } /* * These actions must be executed as root. We do that *after* setting up @@ -3767,14 +5723,22 @@ int main(int argc, char **argv) * kernel tracer. */ if (is_root) { - ret = set_consumer_sockets(&kconsumer_data, rundir); - if (ret < 0) { - goto exit; + if (set_consumer_sockets(&kconsumer_data, rundir)) { + retval = -1; + goto exit_init_data; } /* Setup kernel tracer */ if (!opt_no_kernel) { init_kernel_tracer(); + if (kernel_tracer_fd >= 0) { + ret = syscall_init_table(); + if (ret < 0) { + ERR("Unable to populate syscall table. " + "Syscall tracing won't work " + "for this session daemon."); + } + } } /* Set ulimit for open files */ @@ -3783,28 +5747,26 @@ int main(int argc, char **argv) /* init lttng_fd tracking must be done after set_ulimit. */ lttng_fd_init(); - ret = set_consumer_sockets(&ustconsumer64_data, rundir); - if (ret < 0) { - goto exit; - } - - ret = set_consumer_sockets(&ustconsumer32_data, rundir); - if (ret < 0) { - goto exit; + if (set_consumer_sockets(&ustconsumer64_data, rundir)) { + retval = -1; + goto exit_init_data; } - if ((ret = set_signal_handler()) < 0) { - goto exit; + if (set_consumer_sockets(&ustconsumer32_data, rundir)) { + retval = -1; + goto exit_init_data; } /* Setup the needed unix socket */ - if ((ret = init_daemon_socket()) < 0) { - goto exit; + if (init_daemon_socket()) { + retval = -1; + goto exit_init_data; } /* Set credentials to socket */ - if (is_root && ((ret = set_permissions(rundir)) < 0)) { - goto exit; + if (is_root && set_permissions(rundir)) { + retval = -1; + goto exit_init_data; } /* Get parent pid if -S, --sig-parent is specified. */ @@ -3813,148 +5775,295 @@ int main(int argc, char **argv) } /* Setup the kernel pipe for waking up the kernel thread */ - if ((ret = utils_create_pipe_cloexec(kernel_poll_pipe)) < 0) { - goto exit; + if (is_root && !opt_no_kernel) { + if (utils_create_pipe_cloexec(kernel_poll_pipe)) { + retval = -1; + goto exit_init_data; + } } /* Setup the thread apps communication pipe. */ - if ((ret = utils_create_pipe_cloexec(apps_cmd_pipe)) < 0) { - goto exit; + if (utils_create_pipe_cloexec(apps_cmd_pipe)) { + retval = -1; + goto exit_init_data; } + /* Setup the thread apps notify communication pipe. */ + if (utils_create_pipe_cloexec(apps_cmd_notify_pipe)) { + retval = -1; + goto exit_init_data; + } + + /* Initialize global buffer per UID and PID registry. */ + buffer_reg_init_uid_registry(); + buffer_reg_init_pid_registry(); + /* Init UST command queue. */ - cds_wfq_init(&ust_cmd_queue.queue); + cds_wfcq_init(&ust_cmd_queue.head, &ust_cmd_queue.tail); /* - * Get session list pointer. This pointer MUST NOT be free(). This list is - * statically declared in session.c + * Get session list pointer. This pointer MUST NOT be free'd. This list + * is statically declared in session.c */ session_list_ptr = session_get_list(); - /* Set up max poll set size */ - lttng_poll_set_max_size(); - cmd_init(); - /* Init all health thread counters. */ - health_init(&health_thread_cmd); - health_init(&health_thread_kernel); - health_init(&health_thread_app_manage); - health_init(&health_thread_app_reg); + /* Check for the application socket timeout env variable. */ + env_app_timeout = getenv(DEFAULT_APP_SOCKET_TIMEOUT_ENV); + if (env_app_timeout) { + app_socket_timeout = atoi(env_app_timeout); + } else { + app_socket_timeout = DEFAULT_APP_SOCKET_RW_TIMEOUT; + } - /* - * Init health counters of the consumer thread. We do a quick hack here to - * the state of the consumer health is fine even if the thread is not - * started. This is simply to ease our life and has no cost what so ever. - */ - health_init(&kconsumer_data.health); - health_poll_update(&kconsumer_data.health); - health_init(&ustconsumer32_data.health); - health_poll_update(&ustconsumer32_data.health); - health_init(&ustconsumer64_data.health); - health_poll_update(&ustconsumer64_data.health); + ret = write_pidfile(); + if (ret) { + ERR("Error in write_pidfile"); + retval = -1; + goto exit_init_data; + } + ret = write_agent_port(); + if (ret) { + ERR("Error in write_agent_port"); + retval = -1; + goto exit_init_data; + } - /* Create thread to manage the client socket */ + /* Initialize communication library */ + lttcomm_init(); + /* Initialize TCP timeout values */ + lttcomm_inet_init(); + + if (load_session_init_data(&load_info) < 0) { + retval = -1; + goto exit_init_data; + } + load_info->path = opt_load_session_path; + + /* Create health-check thread */ ret = pthread_create(&health_thread, NULL, thread_manage_health, (void *) NULL); - if (ret != 0) { + if (ret) { + errno = ret; PERROR("pthread_create health"); + retval = -1; goto exit_health; } /* Create thread to manage the client socket */ ret = pthread_create(&client_thread, NULL, thread_manage_clients, (void *) NULL); - if (ret != 0) { + if (ret) { + errno = ret; PERROR("pthread_create clients"); + retval = -1; goto exit_client; } /* Create thread to dispatch registration */ ret = pthread_create(&dispatch_thread, NULL, thread_dispatch_ust_registration, (void *) NULL); - if (ret != 0) { + if (ret) { + errno = ret; PERROR("pthread_create dispatch"); + retval = -1; goto exit_dispatch; } /* Create thread to manage application registration. */ ret = pthread_create(®_apps_thread, NULL, thread_registration_apps, (void *) NULL); - if (ret != 0) { + if (ret) { + errno = ret; PERROR("pthread_create registration"); + retval = -1; goto exit_reg_apps; } /* Create thread to manage application socket */ ret = pthread_create(&apps_thread, NULL, thread_manage_apps, (void *) NULL); - if (ret != 0) { + if (ret) { + errno = ret; PERROR("pthread_create apps"); + retval = -1; goto exit_apps; } - /* Create kernel thread to manage kernel event */ - ret = pthread_create(&kernel_thread, NULL, - thread_manage_kernel, (void *) NULL); - if (ret != 0) { - PERROR("pthread_create kernel"); - goto exit_kernel; + /* Create thread to manage application notify socket */ + ret = pthread_create(&apps_notify_thread, NULL, + ust_thread_manage_notify, (void *) NULL); + if (ret) { + errno = ret; + PERROR("pthread_create notify"); + retval = -1; + goto exit_apps_notify; } - ret = pthread_join(kernel_thread, &status); - if (ret != 0) { - PERROR("pthread_join"); - goto error; /* join error, exit without cleanup */ + /* Create agent registration thread. */ + ret = pthread_create(&agent_reg_thread, NULL, + agent_thread_manage_registration, (void *) NULL); + if (ret) { + errno = ret; + PERROR("pthread_create agent"); + retval = -1; + goto exit_agent_reg; + } + + /* Don't start this thread if kernel tracing is not requested nor root */ + if (is_root && !opt_no_kernel) { + /* Create kernel thread to manage kernel event */ + ret = pthread_create(&kernel_thread, NULL, + thread_manage_kernel, (void *) NULL); + if (ret) { + errno = ret; + PERROR("pthread_create kernel"); + retval = -1; + goto exit_kernel; + } + } + + /* Create session loading thread. */ + ret = pthread_create(&load_session_thread, NULL, thread_load_session, + load_info); + if (ret) { + errno = ret; + PERROR("pthread_create load_session_thread"); + retval = -1; + goto exit_load_session; + } + + /* + * This is where we start awaiting program completion (e.g. through + * signal that asks threads to teardown). + */ + + ret = pthread_join(load_session_thread, &status); + if (ret) { + errno = ret; + PERROR("pthread_join load_session_thread"); + retval = -1; } +exit_load_session: + if (is_root && !opt_no_kernel) { + ret = pthread_join(kernel_thread, &status); + if (ret) { + errno = ret; + PERROR("pthread_join"); + retval = -1; + } + } exit_kernel: - ret = pthread_join(apps_thread, &status); - if (ret != 0) { - PERROR("pthread_join"); - goto error; /* join error, exit without cleanup */ + + ret = pthread_join(agent_reg_thread, &status); + if (ret) { + errno = ret; + PERROR("pthread_join agent"); + retval = -1; + } +exit_agent_reg: + + ret = pthread_join(apps_notify_thread, &status); + if (ret) { + errno = ret; + PERROR("pthread_join apps notify"); + retval = -1; } +exit_apps_notify: + ret = pthread_join(apps_thread, &status); + if (ret) { + errno = ret; + PERROR("pthread_join apps"); + retval = -1; + } exit_apps: + ret = pthread_join(reg_apps_thread, &status); - if (ret != 0) { + if (ret) { + errno = ret; PERROR("pthread_join"); - goto error; /* join error, exit without cleanup */ + retval = -1; } - exit_reg_apps: + + /* + * Join dispatch thread after joining reg_apps_thread to ensure + * we don't leak applications in the queue. + */ ret = pthread_join(dispatch_thread, &status); - if (ret != 0) { + if (ret) { + errno = ret; PERROR("pthread_join"); - goto error; /* join error, exit without cleanup */ + retval = -1; } - exit_dispatch: + ret = pthread_join(client_thread, &status); - if (ret != 0) { + if (ret) { + errno = ret; PERROR("pthread_join"); - goto error; /* join error, exit without cleanup */ + retval = -1; } +exit_client: - ret = join_consumer_thread(&kconsumer_data); - if (ret != 0) { - PERROR("join_consumer"); - goto error; /* join error, exit without cleanup */ + ret = pthread_join(health_thread, &status); + if (ret) { + errno = ret; + PERROR("pthread_join health thread"); + retval = -1; } - -exit_client: exit_health: -exit: + +exit_init_data: /* - * cleanup() is called when no other thread is running. + * sessiond_cleanup() is called when no other thread is running, except + * the ht_cleanup thread, which is needed to destroy the hash tables. */ rcu_thread_online(); - cleanup(); + sessiond_cleanup(); rcu_thread_offline(); rcu_unregister_thread(); - if (!ret) { + + ret = notify_thread_pipe(ht_cleanup_quit_pipe[1]); + if (ret < 0) { + ERR("write error on ht_cleanup quit pipe"); + retval = -1; + } + + ret = pthread_join(ht_cleanup_thread, &status); + if (ret) { + errno = ret; + PERROR("pthread_join ht cleanup thread"); + retval = -1; + } +exit_ht_cleanup: +exit_set_max_size: + + utils_close_pipe(ht_cleanup_pipe); +exit_ht_cleanup_pipe: + + /* + * Close the ht_cleanup quit pipe. + */ + utils_close_pipe(ht_cleanup_quit_pipe); +exit_ht_cleanup_quit_pipe: + + health_app_destroy(health_sessiond); +exit_health_sessiond_cleanup: + +exit_options: + sessiond_cleanup_options(); + +exit_set_signal_handler: + /* Ensure all prior call_rcu are done. */ + rcu_barrier(); + + if (!retval) { exit(EXIT_SUCCESS); + } else { + exit(EXIT_FAILURE); } -error: - exit(EXIT_FAILURE); }