From: David Goulet Date: Tue, 4 Oct 2011 21:15:32 +0000 (-0400) Subject: Add sem_wait timeout on the kconsumerd thread X-Git-Tag: v2.0-pre15~196 X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=commitdiff_plain;h=ee0b00610a6b7beaf6af8feddd7ef6751f22debf Add sem_wait timeout on the kconsumerd thread It's unlikely to happen but is the kconsumerd was not able to send us the sock ready command, at least the session daemon will not stall forever. A 30 seconds timeout is added and error is handle after that. Also adds the handling of the sem_init ret value and adds mutex lock around the kconsumerd pid in the same function. There was also a problem on error code handling where LTTCOMM error was returned by spawn kconsumerd function but handled as if it might be negative at the end of the call chain. This was preventing to return the right error code to the client when the kconsumerd failed. Signed-off-by: David Goulet --- diff --git a/include/lttng-share.h b/include/lttng-share.h index 78abff6ae..399de4f6e 100644 --- a/include/lttng-share.h +++ b/include/lttng-share.h @@ -55,6 +55,13 @@ /* See lttng-ust.h enum lttng_ust_output */ #define DEFAULT_UST_CHANNEL_OUTPUT LTTNG_UST_MMAP +/* + * Default timeout value for the sem_timedwait() call. Blocking forever is not + * wanted so a timeout is used to control the data flow and not freeze the + * session daemon. + */ +#define DEFAULT_SEM_WAIT_TIMEOUT 30 /* in seconds */ + /* * Takes a pointer x and transform it so we can use it to access members * without a function call. Here an example: diff --git a/ltt-sessiond/main.c b/ltt-sessiond/main.c index d9c9d7b9a..32e47068e 100644 --- a/ltt-sessiond/main.c +++ b/ltt-sessiond/main.c @@ -842,6 +842,8 @@ static void *thread_manage_kconsumerd(void *data) goto error; } + DBG2("Receiving code from kconsumerd_err_sock"); + /* Getting status code from kconsumerd */ ret = lttcomm_recv_unix_sock(sock, &code, sizeof(enum lttcomm_return_code)); @@ -861,7 +863,7 @@ static void *thread_manage_kconsumerd(void *data) sem_post(&kconsumerd_sem); DBG("Kconsumerd command socket ready"); } else { - DBG("Kconsumerd error when waiting for SOCK_READY : %s", + ERR("Kconsumerd error when waiting for SOCK_READY : %s", lttcomm_get_readable_code(-code)); goto error; } @@ -1263,29 +1265,66 @@ error: static int spawn_kconsumerd_thread(void) { int ret; + struct timespec timeout; + + timeout.tv_sec = DEFAULT_SEM_WAIT_TIMEOUT; + timeout.tv_nsec = 0; /* Setup semaphore */ - sem_init(&kconsumerd_sem, 0, 0); + ret = sem_init(&kconsumerd_sem, 0, 0); + if (ret < 0) { + PERROR("sem_init kconsumerd_sem"); + goto error; + } ret = pthread_create(&kconsumerd_thread, NULL, thread_manage_kconsumerd, (void *) NULL); if (ret != 0) { - perror("pthread_create kconsumerd"); + PERROR("pthread_create kconsumerd"); + ret = -1; goto error; } - /* Wait for the kconsumerd thread to be ready */ - sem_wait(&kconsumerd_sem); + /* Get time for sem_timedwait absolute timeout */ + ret = clock_gettime(CLOCK_REALTIME, &timeout); + if (ret < 0) { + PERROR("clock_gettime spawn kconsumerd"); + /* Infinite wait for the kconsumerd thread to be ready */ + ret = sem_wait(&kconsumerd_sem); + } else { + /* Normal timeout if the gettime was successful */ + timeout.tv_sec += DEFAULT_SEM_WAIT_TIMEOUT; + ret = sem_timedwait(&kconsumerd_sem, &timeout); + } + if (ret < 0) { + if (errno == ETIMEDOUT) { + /* + * Call has timed out so we kill the kconsumerd_thread and return + * an error. + */ + ERR("The kconsumerd thread was never ready. Killing it"); + ret = pthread_cancel(kconsumerd_thread); + if (ret < 0) { + PERROR("pthread_cancel kconsumerd_thread"); + } + } else { + PERROR("semaphore wait failed kconsumerd thread"); + } + goto error; + } + + pthread_mutex_lock(&kconsumerd_pid_mutex); if (kconsumerd_pid == 0) { ERR("Kconsumerd did not start"); + pthread_mutex_unlock(&kconsumerd_pid_mutex); goto error; } + pthread_mutex_unlock(&kconsumerd_pid_mutex); return 0; error: - ret = LTTCOMM_KERN_CONSUMER_FAIL; return ret; } @@ -1367,18 +1406,16 @@ static int start_kconsumerd(void) ret = spawn_kconsumerd(); if (ret < 0) { ERR("Spawning kconsumerd failed"); - ret = LTTCOMM_KERN_CONSUMER_FAIL; pthread_mutex_unlock(&kconsumerd_pid_mutex); goto error; } /* Setting up the global kconsumerd_pid */ kconsumerd_pid = ret; + DBG2("Kconsumerd pid %d", kconsumerd_pid); pthread_mutex_unlock(&kconsumerd_pid_mutex); - DBG("Kconsumerd pid %d", ret); - - DBG("Spawning kconsumerd thread"); + DBG2("Spawning kconsumerd thread"); ret = spawn_kconsumerd_thread(); if (ret < 0) { ERR("Fatal error spawning kconsumerd thread");