X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-sessiond%2Fmain.c;h=0e20b4f67886dc656e88e5631cce8fc223379205;hp=730ac656ba199153fd663d825ea4058803b396ea;hb=1400a85d14101d8ed140f44011a183f6b5b80d29;hpb=d93c4f1ffcffa73102e3299276f2f83951a68c36 diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c index 730ac656b..0e20b4f67 100644 --- a/src/bin/lttng-sessiond/main.c +++ b/src/bin/lttng-sessiond/main.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -62,6 +61,7 @@ #include "fd-limit.h" #include "filter.h" #include "health.h" +#include "testpoint.h" #define CONSUMERD_FILE "lttng-consumerd" @@ -81,7 +81,10 @@ static int is_root; /* Set to 1 if the daemon is running as root */ static pid_t ppid; /* Parent PID for --sig-parent option */ static char *rundir; -/* Consumer daemon specific control data */ +/* + * Consumer daemon specific control data. Every value not initialized here is + * set to 0 by the static definition. + */ static struct consumer_data kconsumer_data = { .type = LTTNG_CONSUMER_KERNEL, .err_unix_sock_path = DEFAULT_KCONSUMERD_ERR_SOCK_PATH, @@ -90,6 +93,8 @@ static struct consumer_data kconsumer_data = { .cmd_sock = -1, .pid_mutex = PTHREAD_MUTEX_INITIALIZER, .lock = PTHREAD_MUTEX_INITIALIZER, + .cond = PTHREAD_COND_INITIALIZER, + .cond_mutex = PTHREAD_MUTEX_INITIALIZER, }; static struct consumer_data ustconsumer64_data = { .type = LTTNG_CONSUMER64_UST, @@ -99,6 +104,8 @@ static struct consumer_data ustconsumer64_data = { .cmd_sock = -1, .pid_mutex = PTHREAD_MUTEX_INITIALIZER, .lock = PTHREAD_MUTEX_INITIALIZER, + .cond = PTHREAD_COND_INITIALIZER, + .cond_mutex = PTHREAD_MUTEX_INITIALIZER, }; static struct consumer_data ustconsumer32_data = { .type = LTTNG_CONSUMER32_UST, @@ -108,6 +115,8 @@ static struct consumer_data ustconsumer32_data = { .cmd_sock = -1, .pid_mutex = PTHREAD_MUTEX_INITIALIZER, .lock = PTHREAD_MUTEX_INITIALIZER, + .cond = PTHREAD_COND_INITIALIZER, + .cond_mutex = PTHREAD_MUTEX_INITIALIZER, }; /* Shared between threads */ @@ -680,8 +689,12 @@ static void *thread_manage_kernel(void *data) DBG("Thread manage kernel started"); + testpoint(thread_manage_kernel); + health_code_update(&health_thread_kernel); + testpoint(thread_manage_kernel_before_loop); + ret = create_thread_poll_set(&events, 2); if (ret < 0) { goto error_poll_create; @@ -788,6 +801,29 @@ error_poll_create: return NULL; } +/* + * Signal pthread condition of the consumer data that the thread. + */ +static void signal_consumer_condition(struct consumer_data *data, int state) +{ + pthread_mutex_lock(&data->cond_mutex); + + /* + * The state is set before signaling. It can be any value, it's the waiter + * job to correctly interpret this condition variable associated to the + * consumer pthread_cond. + * + * A value of 0 means that the corresponding thread of the consumer data + * was not started. 1 indicates that the thread has started and is ready + * for action. A negative value means that there was an error during the + * thread bootstrap. + */ + data->consumer_thread_is_ready = state; + (void) pthread_cond_signal(&data->cond); + + pthread_mutex_unlock(&data->cond_mutex); +} + /* * This thread manage the consumer error sent back to the session daemon. */ @@ -829,6 +865,9 @@ static void *thread_manage_consumer(void *data) /* Inifinite blocking call, waiting for transmission */ restart: health_poll_update(&consumer_data->health); + + testpoint(thread_manage_consumer); + ret = lttng_poll_wait(&events, -1); health_poll_update(&consumer_data->health); if (ret < 0) { @@ -886,13 +925,13 @@ restart: consumer_data->cmd_sock = lttcomm_connect_unix_sock(consumer_data->cmd_unix_sock_path); if (consumer_data->cmd_sock < 0) { - sem_post(&consumer_data->sem); + /* On error, signal condition and quit. */ + signal_consumer_condition(consumer_data, -1); PERROR("consumer connect"); goto error; } - /* Signal condition to tell that the kconsumerd is ready */ - sem_post(&consumer_data->sem); - DBG("consumer command socket ready"); + signal_consumer_condition(consumer_data, 1); + DBG("Consumer command socket ready"); } else { ERR("consumer error when waiting for SOCK_READY : %s", lttcomm_get_readable_code(-code)); @@ -1026,6 +1065,8 @@ static void *thread_manage_apps(void *data) DBG("[thread] Manage application started"); + testpoint(thread_manage_apps); + rcu_register_thread(); rcu_thread_online(); @@ -1041,6 +1082,8 @@ static void *thread_manage_apps(void *data) goto error; } + testpoint(thread_manage_apps_before_loop); + health_code_update(&health_thread_app_manage); while (1) { @@ -1264,6 +1307,8 @@ static void *thread_registration_apps(void *data) DBG("[thread] Manage application registration started"); + testpoint(thread_registration_apps); + ret = lttcomm_listen_unix_sock(apps_sock); if (ret < 0) { goto error_listen; @@ -1446,59 +1491,110 @@ error_create_poll: */ static int spawn_consumer_thread(struct consumer_data *consumer_data) { - int ret; + int ret, clock_ret; struct timespec timeout; - timeout.tv_sec = DEFAULT_SEM_WAIT_TIMEOUT; - timeout.tv_nsec = 0; + /* Make sure we set the readiness flag to 0 because we are NOT ready */ + consumer_data->consumer_thread_is_ready = 0; - /* Setup semaphore */ - ret = sem_init(&consumer_data->sem, 0, 0); - if (ret < 0) { - PERROR("sem_init consumer semaphore"); + /* Setup pthread condition */ + ret = pthread_condattr_init(&consumer_data->condattr); + if (ret != 0) { + errno = ret; + PERROR("pthread_condattr_init consumer data"); goto error; } - ret = pthread_create(&consumer_data->thread, NULL, - thread_manage_consumer, consumer_data); + /* + * Set the monotonic clock in order to make sure we DO NOT jump in time + * between the clock_gettime() call and the timedwait call. See bug #324 + * for a more details and how we noticed it. + */ + ret = pthread_condattr_setclock(&consumer_data->condattr, CLOCK_MONOTONIC); + if (ret != 0) { + errno = ret; + PERROR("pthread_condattr_setclock consumer data"); + goto error; + } + + ret = pthread_cond_init(&consumer_data->cond, &consumer_data->condattr); + if (ret != 0) { + errno = ret; + PERROR("pthread_cond_init consumer data"); + goto error; + } + + ret = pthread_create(&consumer_data->thread, NULL, thread_manage_consumer, + consumer_data); if (ret != 0) { PERROR("pthread_create consumer"); ret = -1; goto error; } + /* We are about to wait on a pthread condition */ + pthread_mutex_lock(&consumer_data->cond_mutex); + /* Get time for sem_timedwait absolute timeout */ - ret = clock_gettime(CLOCK_REALTIME, &timeout); - if (ret < 0) { - PERROR("clock_gettime spawn consumer"); - /* Infinite wait for the kconsumerd thread to be ready */ - ret = sem_wait(&consumer_data->sem); - } else { - /* Normal timeout if the gettime was successful */ - timeout.tv_sec += DEFAULT_SEM_WAIT_TIMEOUT; - ret = sem_timedwait(&consumer_data->sem, &timeout); + clock_ret = clock_gettime(CLOCK_MONOTONIC, &timeout); + /* + * Set the timeout for the condition timed wait even if the clock gettime + * call fails since we might loop on that call and we want to avoid to + * increment the timeout too many times. + */ + timeout.tv_sec += DEFAULT_SEM_WAIT_TIMEOUT; + + /* + * The following loop COULD be skipped in some conditions so this is why we + * set ret to 0 in order to make sure at least one round of the loop is + * done. + */ + ret = 0; + + /* + * Loop until the condition is reached or when a timeout is reached. Note + * that the pthread_cond_timedwait(P) man page specifies that EINTR can NOT + * be returned but the pthread_cond(3), from the glibc-doc, says that it is + * possible. This loop does not take any chances and works with both of + * them. + */ + while (!consumer_data->consumer_thread_is_ready && ret != ETIMEDOUT) { + if (clock_ret < 0) { + PERROR("clock_gettime spawn consumer"); + /* Infinite wait for the consumerd thread to be ready */ + ret = pthread_cond_wait(&consumer_data->cond, + &consumer_data->cond_mutex); + } else { + ret = pthread_cond_timedwait(&consumer_data->cond, + &consumer_data->cond_mutex, &timeout); + } } - if (ret < 0) { - if (errno == ETIMEDOUT) { + /* Release the pthread condition */ + pthread_mutex_unlock(&consumer_data->cond_mutex); + + if (ret != 0) { + errno = ret; + if (ret == ETIMEDOUT) { /* * Call has timed out so we kill the kconsumerd_thread and return * an error. */ - ERR("The consumer thread was never ready. Killing it"); + ERR("Condition timed out. The consumer thread was never ready." + " Killing it"); ret = pthread_cancel(consumer_data->thread); if (ret < 0) { PERROR("pthread_cancel consumer thread"); } } else { - PERROR("semaphore wait failed consumer thread"); + PERROR("pthread_cond_wait failed consumer thread"); } goto error; } pthread_mutex_lock(&consumer_data->pid_mutex); if (consumer_data->pid == 0) { - ERR("Kconsumerd did not start"); + ERR("Consumerd did not start"); pthread_mutex_unlock(&consumer_data->pid_mutex); goto error; } @@ -1830,6 +1926,15 @@ static int copy_session_consumer(int domain, struct ltt_session *session) switch (domain) { case LTTNG_DOMAIN_KERNEL: DBG3("Copying tracing session consumer output in kernel session"); + /* + * XXX: We should audit the session creation and what this function + * does "extra" in order to avoid a destroy since this function is used + * in the domain session creation (kernel and ust) only. Same for UST + * domain. + */ + if (session->kernel_session->consumer) { + consumer_destroy_output(session->kernel_session->consumer); + } session->kernel_session->consumer = consumer_copy_output(session->consumer); /* Ease our life a bit for the next part */ @@ -1838,6 +1943,9 @@ static int copy_session_consumer(int domain, struct ltt_session *session) break; case LTTNG_DOMAIN_UST: DBG3("Copying tracing session consumer output in UST session"); + if (session->ust_session->consumer) { + consumer_destroy_output(session->ust_session->consumer); + } session->ust_session->consumer = consumer_copy_output(session->consumer); /* Ease our life a bit for the next part */ @@ -2006,6 +2114,7 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, case LTTNG_LIST_DOMAINS: case LTTNG_START_TRACE: case LTTNG_STOP_TRACE: + case LTTNG_DATA_AVAILABLE: need_domain = 0; break; default: @@ -2444,12 +2553,14 @@ skip_domain: DBG("No URIs received from client... continuing"); *sock_error = 1; ret = LTTNG_ERR_SESSION_FAIL; + free(uris); goto error; } ret = cmd_set_consumer_uri(cmd_ctx->lsm->domain.type, cmd_ctx->session, nb_uri, uris); if (ret != LTTNG_OK) { + free(uris); goto error; } @@ -2470,6 +2581,8 @@ skip_domain: } } + free(uris); + break; } case LTTNG_START_TRACE: @@ -2504,12 +2617,14 @@ skip_domain: DBG("No URIs received from client... continuing"); *sock_error = 1; ret = LTTNG_ERR_SESSION_FAIL; + free(uris); goto error; } if (nb_uri == 1 && uris[0].dtype != LTTNG_DST_PATH) { DBG("Creating session with ONE network URI is a bad call"); ret = LTTNG_ERR_SESSION_FAIL; + free(uris); goto error; } } @@ -2517,6 +2632,8 @@ skip_domain: ret = cmd_create_session_uri(cmd_ctx->lsm->session.name, uris, nb_uri, &cmd_ctx->creds); + free(uris); + break; } case LTTNG_DESTROY_SESSION: @@ -2692,6 +2809,11 @@ skip_domain: bytecode); break; } + case LTTNG_DATA_AVAILABLE: + { + ret = cmd_data_available(cmd_ctx->session); + break; + } default: ret = LTTNG_ERR_UND; break; @@ -2912,6 +3034,8 @@ static void *thread_manage_clients(void *data) DBG("[thread] Manage client started"); + testpoint(thread_manage_clients); + rcu_register_thread(); health_code_update(&health_thread_cmd); @@ -2943,6 +3067,8 @@ static void *thread_manage_clients(void *data) kill(ppid, SIGUSR1); } + testpoint(thread_manage_clients_before_loop); + health_code_update(&health_thread_cmd); while (1) { @@ -3859,6 +3985,9 @@ int main(int argc, char **argv) health_init(&ustconsumer64_data.health); health_poll_update(&ustconsumer64_data.health); + /* Not needed anymore. */ + free(rundir); + /* Create thread to manage the client socket */ ret = pthread_create(&health_thread, NULL, thread_manage_health, (void *) NULL);