X-Git-Url: https://git.lttng.org/?a=blobdiff_plain;f=src%2Fbin%2Flttng-sessiond%2Fmain.c;h=14dd1381ad036605f94fd75881b57b69693097e4;hb=f0601a08249eeed11e0f843d74984f4eb9c303c9;hp=4a6b68a381c5394993b802f598ffc33625cfac1b;hpb=56045742fca088c61bdd8126403b5c18b7837962;p=lttng-tools.git diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c index 4a6b68a38..14dd1381a 100644 --- a/src/bin/lttng-sessiond/main.c +++ b/src/bin/lttng-sessiond/main.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "lttng-sessiond.h" #include "buffer-registry.h" @@ -73,12 +74,12 @@ static const char *tracing_group_name = DEFAULT_TRACING_GROUP; static const char *opt_pidfile; static int opt_sig_parent; static int opt_verbose_consumer; -static int opt_daemon; +static int opt_daemon, opt_background; static int opt_no_kernel; -static int is_root; /* Set to 1 if the daemon is running as root */ static pid_t ppid; /* Parent PID for --sig-parent option */ static pid_t child_ppid; /* Internal parent PID use with daemonize. */ static char *rundir; +static int lockfile_fd = -1; /* Set to 1 when a SIGUSR1 signal is received. */ static int recv_child_signal; @@ -243,6 +244,9 @@ struct health_app *health_sessiond; /* JUL TCP port for registration. Used by the JUL thread. */ unsigned int jul_tcp_port = DEFAULT_JUL_TCP_PORT; +/* Am I root or not. */ +int is_root; /* Set to 1 if the daemon is running as root */ + /* * Whether sessiond is ready for commands/health check requests. * NR_LTTNG_SESSIOND_READY must match the number of calls to @@ -269,7 +273,7 @@ void lttng_sessiond_notify_ready(void) * Notify the parent of the fork() process that we are * ready. */ - if (opt_daemon) { + if (opt_daemon || opt_background) { kill(child_ppid, SIGUSR1); } } @@ -455,6 +459,27 @@ static void close_consumer_sockets(void) } } +/* + * Generate the full lock file path using the rundir. + * + * Return the snprintf() return value thus a negative value is an error. + */ +static int generate_lock_file_path(char *path, size_t len) +{ + int ret; + + assert(path); + assert(rundir); + + /* Build lockfile path from rundir. */ + ret = snprintf(path, len, "%s/" DEFAULT_LTTNG_SESSIOND_LOCKFILE, rundir); + if (ret < 0) { + PERROR("snprintf lockfile path"); + } + + return ret; +} + /* * Cleanup the daemon */ @@ -536,14 +561,6 @@ static void cleanup(void) DBG("Removing directory %s", path); (void) rmdir(path); - /* - * We do NOT rmdir rundir because there are other processes - * using it, for instance lttng-relayd, which can start in - * parallel with this teardown. - */ - - free(rundir); - DBG("Cleaning up all sessions"); /* Destroy session list mutex */ @@ -575,6 +592,35 @@ static void cleanup(void) close_consumer_sockets(); + + /* + * Cleanup lock file by deleting it and finaly closing it which will + * release the file system lock. + */ + if (lockfile_fd >= 0) { + char lockfile_path[PATH_MAX]; + + ret = generate_lock_file_path(lockfile_path, sizeof(lockfile_path)); + if (ret > 0) { + ret = remove(lockfile_path); + if (ret < 0) { + PERROR("remove lock file"); + } + ret = close(lockfile_fd); + if (ret < 0) { + PERROR("close lock file"); + } + } + } + + /* + * We do NOT rmdir rundir because there are other processes + * using it, for instance lttng-relayd, which can start in + * parallel with this teardown. + */ + + free(rundir); + /* */ DBG("%c[%d;%dm*** assert failed :-) *** ==> %c[%dm%c[%d;%dm" "Matthew, BEET driven development works!%c[%dm", @@ -1083,7 +1129,6 @@ restart: } health_code_update(); - if (code == LTTCOMM_CONSUMERD_COMMAND_SOCK_READY) { /* Connect both socket, command and metadata. */ consumer_data->cmd_sock = @@ -1240,13 +1285,13 @@ error: } consumer_data->cmd_sock = -1; } - if (*consumer_data->metadata_sock.fd_ptr >= 0) { + if (consumer_data->metadata_sock.fd_ptr && + *consumer_data->metadata_sock.fd_ptr >= 0) { ret = close(*consumer_data->metadata_sock.fd_ptr); if (ret) { PERROR("close"); } } - if (sock >= 0) { ret = close(sock); if (ret) { @@ -1260,9 +1305,10 @@ error: pthread_mutex_unlock(&consumer_data->lock); /* Cleanup metadata socket mutex. */ - pthread_mutex_destroy(consumer_data->metadata_sock.lock); - free(consumer_data->metadata_sock.lock); - + if (consumer_data->metadata_sock.lock) { + pthread_mutex_destroy(consumer_data->metadata_sock.lock); + free(consumer_data->metadata_sock.lock); + } lttng_poll_clean(&events); error_poll: if (err) { @@ -1563,6 +1609,10 @@ static void *thread_dispatch_ust_registration(void *data) health_register(health_sessiond, HEALTH_SESSIOND_TYPE_APP_REG_DISPATCH); + if (testpoint(sessiond_thread_app_reg_dispatch)) { + goto error_testpoint; + } + health_code_update(); CDS_INIT_LIST_HEAD(&wait_queue.head); @@ -1767,6 +1817,7 @@ error: free(wait_node); } +error_testpoint: DBG("Dispatch thread dying"); if (err) { health_error(); @@ -1955,11 +2006,6 @@ static void *thread_registration_apps(void *data) exit: error: - if (err) { - health_error(); - ERR("Health error occurred in %s", __func__); - } - /* Notify that the registration thread is gone */ notify_ust_apps(0); @@ -1984,6 +2030,10 @@ error_listen: error_create_poll: error_testpoint: DBG("UST Registration thread cleanup complete"); + if (err) { + health_error(); + ERR("Health error occurred in %s", __func__); + } health_unregister(health_sessiond); return NULL; @@ -2080,19 +2130,23 @@ static int spawn_consumer_thread(struct consumer_data *consumer_data) if (ret != 0) { errno = ret; if (ret == ETIMEDOUT) { + int pth_ret; + /* * Call has timed out so we kill the kconsumerd_thread and return * an error. */ ERR("Condition timed out. The consumer thread was never ready." " Killing it"); - ret = pthread_cancel(consumer_data->thread); - if (ret < 0) { + pth_ret = pthread_cancel(consumer_data->thread); + if (pth_ret < 0) { PERROR("pthread_cancel consumer thread"); } } else { PERROR("pthread_cond_wait failed consumer thread"); } + /* Caller is expecting a negative value on failure. */ + ret = -1; goto error; } @@ -2178,10 +2232,11 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) consumer_to_use = consumerd32_bin; } else { DBG("Could not find any valid consumerd executable"); + ret = -EINVAL; break; } DBG("Using kernel consumer at: %s", consumer_to_use); - execl(consumer_to_use, + ret = execl(consumer_to_use, "lttng-consumerd", verbosity, "-k", "--consumerd-cmd-sock", consumer_data->cmd_unix_sock_path, "--consumerd-err-sock", consumer_data->err_unix_sock_path, @@ -2229,9 +2284,6 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) if (consumerd64_libdir[0] != '\0') { free(tmpnew); } - if (ret) { - goto error; - } break; } case LTTNG_CONSUMER32_UST: @@ -2275,9 +2327,6 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) if (consumerd32_libdir[0] != '\0') { free(tmpnew); } - if (ret) { - goto error; - } break; } default: @@ -2285,8 +2334,9 @@ static pid_t spawn_consumerd(struct consumer_data *consumer_data) exit(EXIT_FAILURE); } if (errno != 0) { - PERROR("kernel start consumer exec"); + PERROR("Consumer execl()"); } + /* Reaching this point, we got a failure on our execl(). */ exit(EXIT_FAILURE); } else if (pid > 0) { ret = pid; @@ -2844,6 +2894,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, } /* 32-bit */ + pthread_mutex_lock(&ustconsumer32_data.pid_mutex); if (consumerd32_bin[0] != '\0' && ustconsumer32_data.pid == 0 && cmd_ctx->lsm->cmd_type != LTTNG_REGISTER_CONSUMER) { @@ -3048,7 +3099,9 @@ skip_domain: struct lttng_event *events; ssize_t nb_events; + session_lock_list(); nb_events = cmd_list_tracepoints(cmd_ctx->lsm->domain.type, &events); + session_unlock_list(); if (nb_events < 0) { /* Return value is a negative lttng_error_code. */ ret = -nb_events; @@ -3079,8 +3132,10 @@ skip_domain: struct lttng_event_field *fields; ssize_t nb_fields; + session_lock_list(); nb_fields = cmd_list_tracepoint_fields(cmd_ctx->lsm->domain.type, &fields); + session_unlock_list(); if (nb_fields < 0) { /* Return value is a negative lttng_error_code. */ ret = -nb_fields; @@ -3663,7 +3718,7 @@ restart: rcu_thread_online(); - reply.ret_code = 0; + memset(&reply, 0, sizeof(reply)); for (i = 0; i < NR_HEALTH_SESSIOND_TYPES; i++) { /* * health_check_state returns 0 if health is @@ -3973,6 +4028,7 @@ static void usage(void) fprintf(stderr, " --consumerd64-path PATH Specify path for the 64-bit UST consumer daemon binary\n"); fprintf(stderr, " --consumerd64-libdir PATH Specify path for the 64-bit UST consumer daemon libraries\n"); fprintf(stderr, " -d, --daemonize Start as a daemon.\n"); + fprintf(stderr, " -b, --background Start as a daemon, keeping console open.\n"); fprintf(stderr, " -g, --group NAME Specify the tracing group name. (default: tracing)\n"); fprintf(stderr, " -V, --version Show version number.\n"); fprintf(stderr, " -S, --sig-parent Send SIGUSR1 to parent pid to notify readiness.\n"); @@ -4015,12 +4071,13 @@ static int parse_args(int argc, char **argv) { "no-kernel", 0, 0, 'N' }, { "pidfile", 1, 0, 'p' }, { "jul-tcp-port", 1, 0, 'J' }, + { "background", 0, 0, 'b' }, { NULL, 0, 0, 0 } }; while (1) { int option_index = 0; - c = getopt_long(argc, argv, "dhqvVSN" "a:c:g:s:C:E:D:F:Z:u:t:p:J:", + c = getopt_long(argc, argv, "dhqvVSN" "a:c:g:s:C:E:D:F:Z:u:t:p:J:b", long_options, &option_index); if (c == -1) { break; @@ -4042,6 +4099,9 @@ static int parse_args(int argc, char **argv) case 'd': opt_daemon = 1; break; + case 'b': + opt_background = 1; + break; case 'g': tracing_group_name = optarg; break; @@ -4498,6 +4558,24 @@ error: return; } +/* + * Create lockfile using the rundir and return its fd. + */ +static int create_lockfile(void) +{ + int ret; + char lockfile_path[PATH_MAX]; + + ret = generate_lock_file_path(lockfile_path, sizeof(lockfile_path)); + if (ret < 0) { + goto error; + } + + ret = utils_create_lock_file(lockfile_path); +error: + return ret; +} + /* * Write JUL TCP port using the rundir. */ @@ -4526,107 +4604,6 @@ error: return; } -/* - * Daemonize this process by forking and making the parent wait for the child - * to signal it indicating readiness. Once received, the parent successfully - * quits. - * - * The child process undergoes the same action that daemon(3) does meaning - * setsid, chdir, and dup /dev/null into 0, 1 and 2. - * - * Return 0 on success else -1 on error. - */ -static int daemonize(void) -{ - int ret; - pid_t pid; - - /* Get parent pid of this process. */ - child_ppid = getppid(); - - pid = fork(); - if (pid < 0) { - PERROR("fork"); - goto error; - } else if (pid == 0) { - int fd; - pid_t sid; - - /* Child */ - - /* - * Get the newly created parent pid so we can signal that process when - * we are ready to operate. - */ - child_ppid = getppid(); - - sid = setsid(); - if (sid < 0) { - PERROR("setsid"); - goto error; - } - - /* Try to change directory to /. If we can't well at least notify. */ - ret = chdir("/"); - if (ret < 0) { - PERROR("chdir"); - } - - fd = open(_PATH_DEVNULL, O_RDWR, 0); - if (fd < 0) { - PERROR("open %s", _PATH_DEVNULL); - /* Let 0, 1 and 2 open since we can't bind them to /dev/null. */ - } else { - (void) dup2(fd, STDIN_FILENO); - (void) dup2(fd, STDOUT_FILENO); - (void) dup2(fd, STDERR_FILENO); - if (fd > 2) { - ret = close(fd); - if (ret < 0) { - PERROR("close"); - } - } - } - goto end; - } else { - /* Parent */ - - /* - * Waiting for child to notify this parent that it can exit. Note that - * sleep() is interrupted before the 1 second delay as soon as the - * signal is received, so it will not cause visible delay for the - * user. - */ - while (!CMM_LOAD_SHARED(recv_child_signal)) { - int status; - pid_t ret; - - /* - * Check if child exists without blocking. If so, we have to stop - * this parent process and return an error. - */ - ret = waitpid(pid, &status, WNOHANG); - if (ret < 0 || (ret != 0 && WIFEXITED(status))) { - /* The child exited somehow or was not valid. */ - goto error; - } - sleep(1); - } - - /* - * From this point on, the parent can exit and the child is now an - * operationnal session daemon ready to serve clients and applications. - */ - exit(EXIT_SUCCESS); - } - -end: - return 0; - -error: - return -1; -} - /* * main */ @@ -4660,10 +4637,11 @@ int main(int argc, char **argv) } /* Daemonize */ - if (opt_daemon) { + if (opt_daemon || opt_background) { int i; - ret = daemonize(); + ret = lttng_daemonize(&child_ppid, &recv_child_signal, + !opt_background); if (ret < 0) { goto error; } @@ -4774,6 +4752,11 @@ int main(int argc, char **argv) } } + lockfile_fd = create_lockfile(); + if (lockfile_fd < 0) { + goto error; + } + /* Set consumer initial state */ kernel_consumerd_state = CONSUMER_STOPPED; ust_consumerd_state = CONSUMER_STOPPED; @@ -4997,7 +4980,7 @@ int main(int argc, char **argv) ret = pthread_create(&apps_notify_thread, NULL, ust_thread_manage_notify, (void *) NULL); if (ret != 0) { - PERROR("pthread_create apps"); + PERROR("pthread_create notify"); goto exit_apps_notify; } @@ -5005,7 +4988,7 @@ int main(int argc, char **argv) ret = pthread_create(&jul_reg_thread, NULL, jul_thread_manage_registration, (void *) NULL); if (ret != 0) { - PERROR("pthread_create apps"); + PERROR("pthread_create JUL"); goto exit_jul_reg; }