X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-sessiond%2Fmain.c;h=90f11d648e537fee78cbb88506dc6e026f675909;hp=9818b39569d9ee77eb84078d15ed5f12ad2f08ad;hb=ae9e45b342636e7b42eafc15cd105bccfbbbe373;hpb=9449cc7574c27419ac4424b4653a6f4428d01b4d diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c index 9818b3956..90f11d648 100644 --- a/src/bin/lttng-sessiond/main.c +++ b/src/bin/lttng-sessiond/main.c @@ -228,6 +228,11 @@ struct health_state health_thread_app_manage; struct health_state health_thread_app_reg; struct health_state health_thread_kernel; +/* + * Socket timeout for receiving and sending in seconds. + */ +static int app_socket_timeout; + static void setup_consumerd_path(void) { @@ -857,13 +862,6 @@ static void *thread_manage_consumer(void *data) */ health_poll_update(&consumer_data->health); - health_code_update(&consumer_data->health); - - ret = lttcomm_listen_unix_sock(consumer_data->err_sock); - if (ret < 0) { - goto error_listen; - } - /* * Pass 2 as size here for the thread quit pipe and kconsumerd_err_sock. * Nothing more will be added to this poll set. @@ -873,6 +871,11 @@ static void *thread_manage_consumer(void *data) goto error_poll; } + /* + * The error socket here is already in a listening state which was done + * just before spawning this thread to avoid a race between the consumer + * daemon exec trying to connect and the listen() call. + */ ret = lttng_poll_add(&events, consumer_data->err_sock, LPOLLIN | LPOLLRDHUP); if (ret < 0) { goto error; @@ -928,6 +931,12 @@ restart: goto error; } + /* + * Set the CLOEXEC flag. Return code is useless because either way, the + * show must go on. + */ + (void) utils_set_fd_cloexec(sock); + health_code_update(&consumer_data->health); DBG2("Receiving code from consumer err_sock"); @@ -1062,7 +1071,6 @@ error: lttng_poll_clean(&events); error_poll: -error_listen: if (err) { health_error(&consumer_data->health); ERR("Health error occurred in %s", __func__); @@ -1192,16 +1200,22 @@ static void *thread_manage_apps(void *data) ust_app_unregister(ust_cmd.sock); } else { /* - * We just need here to monitor the close of the UST - * socket and poll set monitor those by default. - * Listen on POLLIN (even if we never expect any - * data) to ensure that hangup wakes us. + * We only monitor the error events of the socket. This + * thread does not handle any incoming data from UST + * (POLLIN). */ - ret = lttng_poll_add(&events, ust_cmd.sock, LPOLLIN); + ret = lttng_poll_add(&events, ust_cmd.sock, + LPOLLERR & LPOLLHUP & LPOLLRDHUP); if (ret < 0) { goto error; } + /* Set socket timeout for both receiving and ending */ + (void) lttcomm_setsockopt_rcv_timeout(ust_cmd.sock, + app_socket_timeout); + (void) lttcomm_setsockopt_snd_timeout(ust_cmd.sock, + app_socket_timeout); + DBG("Apps with sock %d added to poll set", ust_cmd.sock); } @@ -1402,6 +1416,12 @@ static void *thread_registration_apps(void *data) goto error; } + /* + * Set the CLOEXEC flag. Return code is useless because + * either way, the show must go on. + */ + (void) utils_set_fd_cloexec(sock); + /* Create UST registration command for enqueuing */ ust_cmd = zmalloc(sizeof(struct ust_command)); if (ust_cmd == NULL) { @@ -1814,7 +1834,17 @@ error: */ static int start_consumerd(struct consumer_data *consumer_data) { - int ret; + int ret, err; + + /* + * Set the listen() state on the socket since there is a possible race + * between the exec() of the consumer daemon and this call if place in the + * consumer thread. See bug #366 for more details. + */ + ret = lttcomm_listen_unix_sock(consumer_data->err_sock); + if (ret < 0) { + goto error; + } pthread_mutex_lock(&consumer_data->pid_mutex); if (consumer_data->pid != 0) { @@ -1845,6 +1875,13 @@ end: return 0; error: + /* Cleanup already created socket on error. */ + if (consumer_data->err_sock >= 0) { + err = close(consumer_data->err_sock); + if (err < 0) { + PERROR("close consumer data error socket"); + } + } return ret; } @@ -2134,7 +2171,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, case LTTNG_LIST_DOMAINS: case LTTNG_START_TRACE: case LTTNG_STOP_TRACE: - case LTTNG_DATA_AVAILABLE: + case LTTNG_DATA_PENDING: need_domain = 0; break; default: @@ -2408,7 +2445,7 @@ skip_domain: ret = cmd_add_context(cmd_ctx->session, cmd_ctx->lsm->domain.type, cmd_ctx->lsm->u.context.channel_name, cmd_ctx->lsm->u.context.event_name, - &cmd_ctx->lsm->u.context.ctx); + &cmd_ctx->lsm->u.context.ctx, kernel_poll_pipe[1]); break; } case LTTNG_DISABLE_CHANNEL: @@ -2829,9 +2866,9 @@ skip_domain: bytecode); break; } - case LTTNG_DATA_AVAILABLE: + case LTTNG_DATA_PENDING: { - ret = cmd_data_available(cmd_ctx->session); + ret = cmd_data_pending(cmd_ctx->session); break; } default: @@ -2882,6 +2919,12 @@ static void *thread_manage_health(void *data) goto error; } + /* + * Set the CLOEXEC flag. Return code is useless because either way, the + * show must go on. + */ + (void) utils_set_fd_cloexec(sock); + ret = lttcomm_listen_unix_sock(sock); if (ret < 0) { goto error; @@ -2946,6 +2989,12 @@ restart: goto error; } + /* + * Set the CLOEXEC flag. Return code is useless because either way, the + * show must go on. + */ + (void) utils_set_fd_cloexec(new_sock); + DBG("Receiving data from client for health..."); ret = lttcomm_recv_unix_sock(new_sock, (void *)&msg, sizeof(msg)); if (ret <= 0) { @@ -3143,6 +3192,12 @@ static void *thread_manage_clients(void *data) goto error; } + /* + * Set the CLOEXEC flag. Return code is useless because either way, the + * show must go on. + */ + (void) utils_set_fd_cloexec(sock); + /* Set socket option for credentials retrieval */ ret = lttcomm_setsockopt_creds_unix_sock(sock); if (ret < 0) { @@ -3443,6 +3498,14 @@ static int init_daemon_socket(void) goto end; } + /* Set the cloexec flag */ + ret = utils_set_fd_cloexec(client_sock); + if (ret < 0) { + ERR("Unable to set CLOEXEC flag to the client Unix socket (fd: %d). " + "Continuing but note that the consumer daemon will have a " + "reference to this socket on exec()", client_sock); + } + /* File permission MUST be 660 */ ret = chmod(client_unix_sock_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (ret < 0) { @@ -3459,6 +3522,14 @@ static int init_daemon_socket(void) goto end; } + /* Set the cloexec flag */ + ret = utils_set_fd_cloexec(apps_sock); + if (ret < 0) { + ERR("Unable to set CLOEXEC flag to the app Unix socket (fd: %d). " + "Continuing but note that the consumer daemon will have a " + "reference to this socket on exec()", apps_sock); + } + /* File permission MUST be 666 */ ret = chmod(apps_unix_sock_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); @@ -3468,6 +3539,9 @@ static int init_daemon_socket(void) goto end; } + DBG3("Session daemon client socket %d and application socket %d created", + client_sock, apps_sock); + end: umask(old_umask); return ret; @@ -3727,7 +3801,7 @@ int main(int argc, char **argv) { int ret = 0; void *status; - const char *home_path; + const char *home_path, *env_app_timeout; init_kernel_workarounds(); @@ -4007,6 +4081,14 @@ int main(int argc, char **argv) health_init(&ustconsumer64_data.health); health_poll_update(&ustconsumer64_data.health); + /* Check for the application socket timeout env variable. */ + env_app_timeout = getenv(DEFAULT_APP_SOCKET_TIMEOUT_ENV); + if (env_app_timeout) { + app_socket_timeout = atoi(env_app_timeout); + } else { + app_socket_timeout = DEFAULT_APP_SOCKET_RW_TIMEOUT; + } + /* Create thread to manage the client socket */ ret = pthread_create(&health_thread, NULL, thread_manage_health, (void *) NULL);