Fix: run-as thread deadlock on itself in restart error path
[lttng-tools.git] / src / common / runas.c
index c81652e9f4b4f3a09ca1747c0b030aa9d1e13c42..adbbe3e97a8446b514f5ac7ec308f0d217730115 100644 (file)
@@ -240,6 +240,7 @@ int _rmdir_recursive(struct run_as_data *data, struct run_as_ret *ret_value)
        return ret_value->u.rmdir_recursive.ret;
 }
 
+#ifdef HAVE_ELF_H
 static
 int _extract_elf_symbol_offset(struct run_as_data *data,
                struct run_as_ret *ret_value)
@@ -298,6 +299,23 @@ free_offset:
 end:
        return ret;
 }
+#else
+static
+int _extract_elf_symbol_offset(struct run_as_data *data,
+               struct run_as_ret *ret_value)
+{
+       ERR("Unimplemented runas command RUN_AS_EXTRACT_ELF_SYMBOL_OFFSET");
+       return -1;
+}
+
+static
+int _extract_sdt_probe_offsets(struct run_as_data *data,
+               struct run_as_ret *ret_value)
+{
+       ERR("Unimplemented runas command RUN_AS_EXTRACT_SDT_PROBE_OFFSETS");
+       return -1;
+}
+#endif
 
 static
 run_as_fct run_as_enum_to_fct(enum run_as_cmd cmd)
@@ -852,7 +870,9 @@ end:
 }
 
 static
-int run_as_create_worker_no_lock(const char *procname)
+int run_as_create_worker_no_lock(const char *procname,
+               post_fork_cleanup_cb clean_up_func,
+               void *clean_up_user_data)
 {
        pid_t pid;
        int i, ret = 0;
@@ -877,7 +897,7 @@ int run_as_create_worker_no_lock(const char *procname)
        worker->procname = strdup(procname);
        if (!worker->procname) {
                ret = -ENOMEM;
-               goto end;
+               goto error_procname_alloc;
        }
        /* Create unix socket. */
        if (lttcomm_create_anon_unix_socketpair(worker->sockpair) < 0) {
@@ -897,6 +917,12 @@ int run_as_create_worker_no_lock(const char *procname)
                reset_sighandler();
 
                set_worker_sighandlers();
+               if (clean_up_func) {
+                       if (clean_up_func(clean_up_user_data) < 0) {
+                               ERR("Run-as post-fork clean-up failed, exiting.");
+                               exit(EXIT_FAILURE);
+                       }
+               }
 
                /* Just close, no shutdown. */
                if (close(worker->sockpair[0])) {
@@ -921,6 +947,8 @@ int run_as_create_worker_no_lock(const char *procname)
                        ret = -1;
                }
                worker->sockpair[1] = -1;
+               free(worker->procname);
+               free(worker);
                LOG(ret ? PRINT_ERR : PRINT_DBG, "run_as worker exiting (ret = %d)", ret);
                exit(ret ? EXIT_FAILURE : EXIT_SUCCESS);
        } else {
@@ -960,10 +988,57 @@ error_fork:
                worker->sockpair[i] = -1;
        }
 error_sock:
+       free(worker->procname);
+error_procname_alloc:
        free(worker);
        return ret;
 }
 
+static
+void run_as_destroy_worker_no_lock(void)
+{
+       struct run_as_worker *worker = global_worker;
+
+       DBG("Destroying run_as worker");
+       if (!worker) {
+               return;
+       }
+       /* Close unix socket */
+       DBG("Closing run_as worker socket");
+       if (lttcomm_close_unix_sock(worker->sockpair[0])) {
+               PERROR("close");
+       }
+       worker->sockpair[0] = -1;
+       /* Wait for worker. */
+       for (;;) {
+               int status;
+               pid_t wait_ret;
+
+               wait_ret = waitpid(worker->pid, &status, 0);
+               if (wait_ret < 0) {
+                       if (errno == EINTR) {
+                               continue;
+                       }
+                       PERROR("waitpid");
+                       break;
+               }
+
+               if (WIFEXITED(status)) {
+                       LOG(WEXITSTATUS(status) == 0 ? PRINT_DBG : PRINT_ERR,
+                                       DEFAULT_RUN_AS_WORKER_NAME " terminated with status code %d",
+                                       WEXITSTATUS(status));
+                       break;
+               } else if (WIFSIGNALED(status)) {
+                       ERR(DEFAULT_RUN_AS_WORKER_NAME " was killed by signal %d",
+                                       WTERMSIG(status));
+                       break;
+               }
+       }
+       free(worker->procname);
+       free(worker);
+       global_worker = NULL;
+}
+
 static
 int run_as_restart_worker(struct run_as_worker *worker)
 {
@@ -973,10 +1048,10 @@ int run_as_restart_worker(struct run_as_worker *worker)
        procname = worker->procname;
 
        /* Close socket to run_as worker process and clean up the zombie process */
-       run_as_destroy_worker();
+       run_as_destroy_worker_no_lock();
 
        /* Create a new run_as worker process*/
-       ret = run_as_create_worker_no_lock(procname);
+       ret = run_as_create_worker_no_lock(procname, NULL, NULL);
        if (ret < 0 ) {
                ERR("Restarting the worker process failed");
                ret = -1;
@@ -1192,12 +1267,15 @@ int run_as_extract_sdt_probe_offsets(int fd, const char* provider_name,
 }
 
 LTTNG_HIDDEN
-int run_as_create_worker(const char *procname)
+int run_as_create_worker(const char *procname,
+               post_fork_cleanup_cb clean_up_func,
+               void *clean_up_user_data)
 {
        int ret;
 
        pthread_mutex_lock(&worker_lock);
-       ret = run_as_create_worker_no_lock(procname);
+       ret = run_as_create_worker_no_lock(procname, clean_up_func,
+                       clean_up_user_data);
        pthread_mutex_unlock(&worker_lock);
        return ret;
 }
@@ -1205,47 +1283,7 @@ int run_as_create_worker(const char *procname)
 LTTNG_HIDDEN
 void run_as_destroy_worker(void)
 {
-       struct run_as_worker *worker = global_worker;
-
-       DBG("Destroying run_as worker");
        pthread_mutex_lock(&worker_lock);
-       if (!worker) {
-               goto end;
-       }
-       /* Close unix socket */
-       DBG("Closing run_as worker socket");
-       if (lttcomm_close_unix_sock(worker->sockpair[0])) {
-               PERROR("close");
-       }
-       worker->sockpair[0] = -1;
-       /* Wait for worker. */
-       for (;;) {
-               int status;
-               pid_t wait_ret;
-
-               wait_ret = waitpid(worker->pid, &status, 0);
-               if (wait_ret < 0) {
-                       if (errno == EINTR) {
-                               continue;
-                       }
-                       PERROR("waitpid");
-                       break;
-               }
-
-               if (WIFEXITED(status)) {
-                       LOG(WEXITSTATUS(status) == 0 ? PRINT_DBG : PRINT_ERR,
-                                       DEFAULT_RUN_AS_WORKER_NAME " terminated with status code %d",
-                                       WEXITSTATUS(status));
-                       break;
-               } else if (WIFSIGNALED(status)) {
-                       ERR(DEFAULT_RUN_AS_WORKER_NAME " was killed by signal %d",
-                                       WTERMSIG(status));
-                       break;
-               }
-       }
-       free(worker->procname);
-       free(worker);
-       global_worker = NULL;
-end:
+       run_as_destroy_worker_no_lock();
        pthread_mutex_unlock(&worker_lock);
 }
This page took 0.026163 seconds and 4 git commands to generate.