X-Git-Url: https://git.lttng.org/?a=blobdiff_plain;f=src%2Fbin%2Flttng-relayd%2Fmain.c;h=33aad88ee3e5d0d274790dd4edef675ffeeb06db;hb=51a9e1c7f7fd48e2b53e258aee269a69cb8b59d3;hp=bf7be3e25c77b299e139ce8c46d49e6b8b9bb667;hpb=55706a7d35e938a784ca0ccbdb3bb902070d9394;p=lttng-tools.git diff --git a/src/bin/lttng-relayd/main.c b/src/bin/lttng-relayd/main.c index bf7be3e25..33aad88ee 100644 --- a/src/bin/lttng-relayd/main.c +++ b/src/bin/lttng-relayd/main.c @@ -68,6 +68,8 @@ static struct lttng_uri *live_uri; const char *progname; +const char *tracing_group_name = DEFAULT_TRACING_GROUP; + /* * Quit pipe for all threads. This permits a single cancellation point * for all threads when receiving an event on the pipe. @@ -86,6 +88,7 @@ static int dispatch_thread_exit; static pthread_t listener_thread; static pthread_t dispatcher_thread; static pthread_t worker_thread; +static pthread_t health_thread; static uint64_t last_relay_stream_id; static uint64_t last_relay_session_id; @@ -116,7 +119,7 @@ struct lttng_ht *viewer_streams_ht; struct lttng_ht *indexes_ht; /* Relayd health monitoring */ -static struct health_app *health_relayd; +struct health_app *health_relayd; /* * usage function on stderr @@ -131,6 +134,7 @@ void usage(void) fprintf(stderr, " -D, --data-port URL Data port listening.\n"); fprintf(stderr, " -o, --output PATH Output path for traces. Must use an absolute path.\n"); fprintf(stderr, " -v, --verbose Verbose mode. Activate DBG() macro.\n"); + fprintf(stderr, " -g, --group NAME Specify the tracing group name. (default: tracing)\n"); } static @@ -144,6 +148,7 @@ int parse_args(int argc, char **argv) { "control-port", 1, 0, 'C', }, { "data-port", 1, 0, 'D', }, { "daemonize", 0, 0, 'd', }, + { "group", 1, 0, 'g', }, { "help", 0, 0, 'h', }, { "output", 1, 0, 'o', }, { "verbose", 0, 0, 'v', }, @@ -152,7 +157,7 @@ int parse_args(int argc, char **argv) while (1) { int option_index = 0; - c = getopt_long(argc, argv, "dhv" "C:D:o:", + c = getopt_long(argc, argv, "dhv" "C:D:o:g:", long_options, &option_index); if (c == -1) { break; @@ -188,6 +193,9 @@ int parse_args(int argc, char **argv) case 'd': opt_daemon = 1; break; + case 'g': + tracing_group_name = optarg; + break; case 'h': usage(); exit(EXIT_FAILURE); @@ -297,6 +305,18 @@ int notify_thread_pipe(int wpipe) return ret; } +static void notify_health_quit_pipe(int *pipe) +{ + int ret; + + do { + ret = write(pipe[1], "4", 1); + } while (ret < 0 && errno == EINTR); + if (ret < 0 || ret != 1) { + PERROR("write relay health quit"); + } +} + /* * Stop all threads by closing the thread quit pipe. */ @@ -312,6 +332,8 @@ void stop_threads(void) ERR("write error on thread quit pipe"); } + notify_health_quit_pipe(health_quit_pipe); + /* Dispatch thread */ CMM_STORE_SHARED(dispatch_thread_exit, 1); futex_nto1_wake(&relay_cmd_queue.futex); @@ -519,6 +541,8 @@ void *relay_thread_listener(void *data) health_register(health_relayd, HEALTH_RELAYD_TYPE_LISTENER); + health_code_update(); + control_sock = relay_init_sock(control_uri); if (!control_sock) { goto error_sock_control; @@ -550,10 +574,14 @@ void *relay_thread_listener(void *data) } while (1) { + health_code_update(); + DBG("Listener accepting connections"); restart: + health_poll_entry(); ret = lttng_poll_wait(&events, -1); + health_poll_exit(); if (ret < 0) { /* * Restart interrupted system call. @@ -568,6 +596,8 @@ restart: DBG("Relay new connection received"); for (i = 0; i < nb_fd; i++) { + health_code_update(); + /* Fetch once the poll data */ revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); @@ -662,7 +692,8 @@ error_sock_relay: lttcomm_destroy_sock(control_sock); error_sock_control: if (err) { - DBG("Thread exited with error"); + health_error(); + ERR("Health error occurred in %s", __func__); } health_unregister(health_relayd); DBG("Relay listener thread cleanup complete"); @@ -676,7 +707,7 @@ error_sock_control: static void *relay_thread_dispatcher(void *data) { - int ret; + int ret, err = -1; struct cds_wfq_node *node; struct relay_command *relay_cmd = NULL; @@ -684,11 +715,17 @@ void *relay_thread_dispatcher(void *data) health_register(health_relayd, HEALTH_RELAYD_TYPE_DISPATCHER); + health_code_update(); + while (!CMM_LOAD_SHARED(dispatch_thread_exit)) { + health_code_update(); + /* Atomically prepare the queue futex */ futex_nto1_prepare(&relay_cmd_queue.futex); do { + health_code_update(); + /* Dequeue commands */ node = cds_wfq_dequeue_blocking(&relay_cmd_queue.queue); if (node == NULL) { @@ -717,10 +754,19 @@ void *relay_thread_dispatcher(void *data) } while (node != NULL); /* Futex wait on queue. Blocking call on futex() */ + health_poll_entry(); futex_nto1_wait(&relay_cmd_queue.futex); + health_poll_exit(); } + /* Normal exit, no error */ + err = 0; + error: + if (err) { + health_error(); + ERR("Health error occurred in %s", __func__); + } health_unregister(health_relayd); DBG("Dispatch thread dying"); stop_threads(); @@ -2156,6 +2202,8 @@ void *relay_thread_worker(void *data) health_register(health_relayd, HEALTH_RELAYD_TYPE_WORKER); + health_code_update(); + /* table of connections indexed on socket */ relay_connections_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); if (!relay_connections_ht) { @@ -2182,9 +2230,13 @@ restart: while (1) { int idx = -1, i, seen_control = 0, last_notdel_data_fd = -1; + health_code_update(); + /* Infinite blocking call, waiting for transmission */ DBG3("Relayd worker thread polling..."); + health_poll_entry(); ret = lttng_poll_wait(&events, -1); + health_poll_exit(); if (ret < 0) { /* * Restart interrupted system call. @@ -2207,6 +2259,8 @@ restart: uint32_t revents = LTTNG_POLL_GETEV(&events, i); int pollfd = LTTNG_POLL_GETFD(&events, i); + health_code_update(); + /* Thread quit pipe has been closed. Killing thread. */ ret = check_thread_quit_pipe(pollfd, revents); if (ret) { @@ -2309,6 +2363,9 @@ restart: if (last_seen_data_fd >= 0) { for (i = 0; i < nb_fd; i++) { int pollfd = LTTNG_POLL_GETFD(&events, i); + + health_code_update(); + if (last_seen_data_fd == pollfd) { idx = i; break; @@ -2322,6 +2379,8 @@ restart: uint32_t revents = LTTNG_POLL_GETEV(&events, i); int pollfd = LTTNG_POLL_GETFD(&events, i); + health_code_update(); + /* Skip the command pipe. It's handled in the first loop. */ if (pollfd == relay_cmd_pipe[0]) { continue; @@ -2371,6 +2430,9 @@ restart: last_seen_data_fd = -1; } + /* Normal exit, no error */ + ret = 0; + exit: error: lttng_poll_clean(&events); @@ -2378,6 +2440,8 @@ error: /* empty the hash table and free the memory */ rcu_read_lock(); cds_lfht_for_each_entry(relay_connections_ht->ht, &iter.iter, node, node) { + health_code_update(); + node = lttng_ht_iter_get_node_ulong(&iter); if (node) { relay_connection = caa_container_of(node, @@ -2397,11 +2461,15 @@ relay_connections_ht_error: if (err) { DBG("Thread exited with error"); } - health_unregister(health_relayd); DBG("Worker thread cleanup complete"); free(data_buffer); - stop_threads(); + if (err) { + health_error(); + ERR("Health error occurred in %s", __func__); + } + health_unregister(health_relayd); rcu_unregister_thread(); + stop_threads(); return NULL; } @@ -2523,6 +2591,19 @@ int main(int argc, char **argv) goto exit_health_app_create; } + ret = utils_create_pipe(health_quit_pipe); + if (ret < 0) { + goto error_health_pipe; + } + + /* Create thread to manage the client socket */ + ret = pthread_create(&health_thread, NULL, + thread_manage_health, (void *) NULL); + if (ret != 0) { + PERROR("pthread_create health"); + goto health_error; + } + /* Setup the dispatcher thread */ ret = pthread_create(&dispatcher_thread, NULL, relay_thread_dispatcher, (void *) NULL); @@ -2553,8 +2634,6 @@ int main(int argc, char **argv) goto exit_live; } - live_stop_threads(); - exit_live: ret = pthread_join(listener_thread, &status); if (ret != 0) { @@ -2577,6 +2656,21 @@ exit_worker: } exit_dispatcher: + ret = pthread_join(health_thread, &status); + if (ret != 0) { + PERROR("pthread_join health thread"); + goto error; /* join error, exit without cleanup */ + } + + /* + * Stop live threads only after joining other threads. + */ + live_stop_threads(); + +health_error: + utils_close_pipe(health_quit_pipe); + +error_health_pipe: health_app_destroy(health_relayd); exit_health_app_create: