X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-relayd%2Fmain.c;h=fb290bacc1a4345e12850d475fa10c720115f77f;hp=fdfe73ccc913ee8e9674f2ac73325f0c0bc22d1c;hb=f48c25b760239f20a6e82f3839e04f82d07bdeea;hpb=2a174661a1e0ab551b41ff1cae7191688525fc1f diff --git a/src/bin/lttng-relayd/main.c b/src/bin/lttng-relayd/main.c index fdfe73ccc..fb290bacc 100644 --- a/src/bin/lttng-relayd/main.c +++ b/src/bin/lttng-relayd/main.c @@ -18,6 +18,7 @@ */ #define _GNU_SOURCE +#define _LGPL_SOURCE #include #include #include @@ -44,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -65,6 +67,7 @@ #include "viewer-stream.h" #include "session.h" #include "stream.h" +#include "connection.h" /* command line options */ char *opt_output_path; @@ -100,7 +103,7 @@ int thread_quit_pipe[2] = { -1, -1 }; * This pipe is used to inform the worker thread that a command is queued and * ready to be processed. */ -static int relay_cmd_pipe[2] = { -1, -1 }; +static int relay_conn_pipe[2] = { -1, -1 }; /* Shared between threads */ static int dispatch_thread_exit; @@ -118,7 +121,7 @@ static uint64_t last_relay_stream_id; * The relay_thread_listener and relay_thread_dispatcher communicate with this * queue. */ -static struct relay_cmd_queue relay_cmd_queue; +static struct relay_conn_queue relay_conn_queue; /* buffer allocated at startup, used to store the trace data */ static char *data_buffer; @@ -231,6 +234,11 @@ int set_option(int opt, const char *arg, const char *optname) break; case 'g': tracing_group_name = strdup(arg); + if (tracing_group_name == NULL) { + ret = -errno; + PERROR("strdup"); + goto end; + } tracing_group_name_override = 1; break; case 'h': @@ -249,7 +257,10 @@ int set_option(int opt, const char *arg, const char *optname) if (arg) { lttng_opt_verbose = config_parse_value(arg); } else { - lttng_opt_verbose += 1; + /* Only 3 level of verbosity (-vvv). */ + if (lttng_opt_verbose < 3) { + lttng_opt_verbose += 1; + } } break; default: @@ -325,7 +336,7 @@ end: static int set_options(int argc, char **argv) { - int c, ret = 0, option_index = 0; + int c, ret = 0, option_index = 0, retval = 0; int orig_optopt = optopt, orig_optind = optind; char *default_address, *optstring; const char *config_path = NULL; @@ -333,7 +344,7 @@ int set_options(int argc, char **argv) optstring = utils_generate_optstring(long_options, sizeof(long_options) / sizeof(struct option)); if (!optstring) { - ret = -ENOMEM; + retval = -ENOMEM; goto exit; } @@ -342,7 +353,7 @@ int set_options(int argc, char **argv) while ((c = getopt_long(argc, argv, optstring, long_options, &option_index)) != -1) { if (c == '?') { - ret = -EINVAL; + retval = -EINVAL; goto exit; } else if (c != 'f') { continue; @@ -359,8 +370,8 @@ int set_options(int argc, char **argv) if (ret) { if (ret > 0) { ERR("Invalid configuration option at line %i", ret); - ret = -1; } + retval = -1; goto exit; } @@ -375,16 +386,19 @@ int set_options(int argc, char **argv) ret = set_option(c, optarg, long_options[option_index].name); if (ret < 0) { + retval = -1; goto exit; } } /* assign default values */ if (control_uri == NULL) { - ret = asprintf(&default_address, "tcp://0.0.0.0:%d", - DEFAULT_NETWORK_CONTROL_PORT); + ret = asprintf(&default_address, + "tcp://" DEFAULT_NETWORK_CONTROL_BIND_ADDRESS ":%d", + DEFAULT_NETWORK_CONTROL_PORT); if (ret < 0) { PERROR("asprintf default data address"); + retval = -1; goto exit; } @@ -392,14 +406,17 @@ int set_options(int argc, char **argv) free(default_address); if (ret < 0) { ERR("Invalid control URI specified"); + retval = -1; goto exit; } } if (data_uri == NULL) { - ret = asprintf(&default_address, "tcp://0.0.0.0:%d", - DEFAULT_NETWORK_DATA_PORT); + ret = asprintf(&default_address, + "tcp://" DEFAULT_NETWORK_DATA_BIND_ADDRESS ":%d", + DEFAULT_NETWORK_DATA_PORT); if (ret < 0) { PERROR("asprintf default data address"); + retval = -1; goto exit; } @@ -407,14 +424,17 @@ int set_options(int argc, char **argv) free(default_address); if (ret < 0) { ERR("Invalid data URI specified"); + retval = -1; goto exit; } } if (live_uri == NULL) { - ret = asprintf(&default_address, "tcp://0.0.0.0:%d", - DEFAULT_NETWORK_VIEWER_PORT); + ret = asprintf(&default_address, + "tcp://" DEFAULT_NETWORK_VIEWER_BIND_ADDRESS ":%d", + DEFAULT_NETWORK_VIEWER_PORT); if (ret < 0) { PERROR("asprintf default viewer control address"); + retval = -1; goto exit; } @@ -422,23 +442,32 @@ int set_options(int argc, char **argv) free(default_address); if (ret < 0) { ERR("Invalid viewer control URI specified"); + retval = -1; goto exit; } } exit: free(optstring); - return ret; + return retval; } /* * Cleanup the daemon */ static -void cleanup(void) +void relayd_cleanup(struct relay_local_data *relay_ctx) { DBG("Cleaning up"); + if (viewer_streams_ht) + lttng_ht_destroy(viewer_streams_ht); + if (relay_streams_ht) + lttng_ht_destroy(relay_streams_ht); + if (relay_ctx && relay_ctx->sessions_ht) + lttng_ht_destroy(relay_ctx->sessions_ht); + free(relay_ctx); + /* free the dynamically allocated opt_output_path */ free(opt_output_path); @@ -465,41 +494,55 @@ int notify_thread_pipe(int wpipe) ret = lttng_write(wpipe, "!", 1); if (ret < 1) { PERROR("write poll pipe"); + goto end; } - + ret = 0; +end: return ret; } -static void notify_health_quit_pipe(int *pipe) +static +int notify_health_quit_pipe(int *pipe) { ssize_t ret; ret = lttng_write(pipe[1], "4", 1); if (ret < 1) { PERROR("write relay health quit"); + goto end; } + ret = 0; +end: + return ret; } /* - * Stop all threads by closing the thread quit pipe. + * Stop all relayd and relayd-live threads. */ -static -void stop_threads(void) +int lttng_relay_stop_threads(void) { - int ret; + int retval = 0; /* Stopping all threads */ DBG("Terminating all threads"); - ret = notify_thread_pipe(thread_quit_pipe[1]); - if (ret < 0) { + if (notify_thread_pipe(thread_quit_pipe[1])) { ERR("write error on thread quit pipe"); + retval = -1; } - notify_health_quit_pipe(health_quit_pipe); + if (notify_health_quit_pipe(health_quit_pipe)) { + ERR("write error on health quit pipe"); + } /* Dispatch thread */ CMM_STORE_SHARED(dispatch_thread_exit, 1); - futex_nto1_wake(&relay_cmd_queue.futex); + futex_nto1_wake(&relay_conn_queue.futex); + + if (relayd_live_stop()) { + ERR("Error stopping live threads"); + retval = -1; + } + return retval; } /* @@ -517,11 +560,15 @@ void sighandler(int sig) return; case SIGINT: DBG("SIGINT caught"); - stop_threads(); + if (lttng_relay_stop_threads()) { + ERR("Error stopping threads"); + } break; case SIGTERM: DBG("SIGTERM caught"); - stop_threads(); + if (lttng_relay_stop_threads()) { + ERR("Error stopping threads"); + } break; case SIGUSR1: CMM_STORE_SHARED(recv_child_signal, 1); @@ -728,7 +775,7 @@ static void try_close_stream(struct relay_session *session, pthread_mutex_unlock(&session->viewer_ready_lock); ret = stream_close(session, stream); - if (!ret) { + if (ret || session->snapshot) { /* Already close thus the ctf trace is being or has been destroyed. */ goto end; } @@ -746,7 +793,6 @@ static void *relay_thread_listener(void *data) { int i, ret, pollfd, err = -1; - int val = 1; uint32_t revents, nb_fd; struct lttng_poll_event events; struct lttcomm_sock *control_sock, *data_sock; @@ -822,6 +868,11 @@ restart: revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); + if (!revents) { + /* No activity for this FD (poll implementation). */ + continue; + } + /* Thread quit pipe has been closed. Killing thread. */ ret = check_thread_quit_pipe(pollfd, revents); if (ret) { @@ -834,58 +885,55 @@ restart: goto error; } else if (revents & LPOLLIN) { /* - * Get allocated in this thread, - * enqueued to a global queue, dequeued - * and freed in the worker thread. + * Get allocated in this thread, enqueued to a global queue, + * dequeued and freed in the worker thread. */ - struct relay_command *relay_cmd; + int val = 1; + struct relay_connection *new_conn; struct lttcomm_sock *newsock; - relay_cmd = zmalloc(sizeof(struct relay_command)); - if (relay_cmd == NULL) { - PERROR("relay command zmalloc"); + new_conn = connection_create(); + if (!new_conn) { goto error; } if (pollfd == data_sock->fd) { + new_conn->type = RELAY_DATA; newsock = data_sock->ops->accept(data_sock); - if (!newsock) { - PERROR("accepting data sock"); - free(relay_cmd); - goto error; - } - relay_cmd->type = RELAY_DATA; - DBG("Relay data connection accepted, socket %d", newsock->fd); + DBG("Relay data connection accepted, socket %d", + newsock->fd); } else { assert(pollfd == control_sock->fd); + new_conn->type = RELAY_CONTROL; newsock = control_sock->ops->accept(control_sock); - if (!newsock) { - PERROR("accepting control sock"); - free(relay_cmd); - goto error; - } - relay_cmd->type = RELAY_CONTROL; - DBG("Relay control connection accepted, socket %d", newsock->fd); + DBG("Relay control connection accepted, socket %d", + newsock->fd); } - ret = setsockopt(newsock->fd, SOL_SOCKET, SO_REUSEADDR, - &val, sizeof(int)); + if (!newsock) { + PERROR("accepting sock"); + connection_free(new_conn); + goto error; + } + + ret = setsockopt(newsock->fd, SOL_SOCKET, SO_REUSEADDR, &val, + sizeof(val)); if (ret < 0) { PERROR("setsockopt inet"); lttcomm_destroy_sock(newsock); - free(relay_cmd); + connection_free(new_conn); goto error; } - relay_cmd->sock = newsock; - /* - * Lock free enqueue the request. - */ - cds_wfq_enqueue(&relay_cmd_queue.queue, &relay_cmd->node); + new_conn->sock = newsock; + + /* Enqueue request for the dispatcher thread. */ + cds_wfcq_enqueue(&relay_conn_queue.head, &relay_conn_queue.tail, + &new_conn->qnode); /* - * Wake the dispatch queue futex. Implicit memory - * barrier with the exchange in cds_wfq_enqueue. + * Wake the dispatch queue futex. Implicit memory barrier with + * the exchange in cds_wfcq_enqueue. */ - futex_nto1_wake(&relay_cmd_queue.futex); + futex_nto1_wake(&relay_conn_queue.futex); } } } @@ -918,7 +966,7 @@ error_sock_control: } health_unregister(health_relayd); DBG("Relay listener thread cleanup complete"); - stop_threads(); + lttng_relay_stop_threads(); return NULL; } @@ -930,8 +978,8 @@ void *relay_thread_dispatcher(void *data) { int err = -1; ssize_t ret; - struct cds_wfq_node *node; - struct relay_command *relay_cmd = NULL; + struct cds_wfcq_node *node; + struct relay_connection *new_conn = NULL; DBG("[thread] Relay dispatcher started"); @@ -947,39 +995,39 @@ void *relay_thread_dispatcher(void *data) health_code_update(); /* Atomically prepare the queue futex */ - futex_nto1_prepare(&relay_cmd_queue.futex); + futex_nto1_prepare(&relay_conn_queue.futex); do { health_code_update(); /* Dequeue commands */ - node = cds_wfq_dequeue_blocking(&relay_cmd_queue.queue); + node = cds_wfcq_dequeue_blocking(&relay_conn_queue.head, + &relay_conn_queue.tail); if (node == NULL) { DBG("Woken up but nothing in the relay command queue"); /* Continue thread execution */ break; } + new_conn = caa_container_of(node, struct relay_connection, qnode); - relay_cmd = caa_container_of(node, struct relay_command, node); - DBG("Dispatching request waiting on sock %d", relay_cmd->sock->fd); + DBG("Dispatching request waiting on sock %d", new_conn->sock->fd); /* - * Inform worker thread of the new request. This - * call is blocking so we can be assured that the data will be read - * at some point in time or wait to the end of the world :) + * Inform worker thread of the new request. This call is blocking + * so we can be assured that the data will be read at some point in + * time or wait to the end of the world :) */ - ret = lttng_write(relay_cmd_pipe[1], relay_cmd, - sizeof(struct relay_command)); - free(relay_cmd); - if (ret < sizeof(struct relay_command)) { - PERROR("write cmd pipe"); + ret = lttng_write(relay_conn_pipe[1], &new_conn, sizeof(new_conn)); + if (ret < 0) { + PERROR("write connection pipe"); + connection_destroy(new_conn); goto error; } } while (node != NULL); /* Futex wait on queue. Blocking call on futex() */ health_poll_entry(); - futex_nto1_wait(&relay_cmd_queue.futex); + futex_nto1_wait(&relay_conn_queue.futex); health_poll_exit(); } @@ -994,7 +1042,7 @@ error_testpoint: } health_unregister(health_relayd); DBG("Dispatch thread dying"); - stop_threads(); + lttng_relay_stop_threads(); return NULL; } @@ -1026,23 +1074,22 @@ static void try_close_streams(struct relay_session *session) /* * Try to destroy a session within a connection. */ -static -void relay_delete_session(struct relay_command *cmd, +static void destroy_session(struct relay_session *session, struct lttng_ht *sessions_ht) { - assert(cmd); + assert(session); assert(sessions_ht); /* Indicate that this session can be destroyed from now on. */ - cmd->session->close_flag = 1; + session->close_flag = 1; - try_close_streams(cmd->session); + try_close_streams(session); /* * This will try to delete and destroy the session if no viewer is attached * to it meaning the refcount is down to zero. */ - session_try_destroy(sessions_ht, cmd->session); + session_try_destroy(sessions_ht, session); } /* @@ -1074,15 +1121,14 @@ static void copy_index_control_data(struct relay_index *index, */ static int relay_create_session(struct lttcomm_relayd_hdr *recv_hdr, - struct relay_command *cmd, - struct lttng_ht *sessions_ht) + struct relay_connection *conn) { int ret = 0, send_ret; struct relay_session *session; struct lttcomm_relayd_status_session reply; assert(recv_hdr); - assert(cmd); + assert(conn); memset(&reply, 0, sizeof(reply)); @@ -1091,25 +1137,24 @@ int relay_create_session(struct lttcomm_relayd_hdr *recv_hdr, ret = -1; goto error; } - session->minor = cmd->minor; - session->major = cmd->major; - cmd->session_id = session->id; - cmd->session = session; + session->minor = conn->minor; + session->major = conn->major; + conn->session_id = session->id; + conn->session = session; reply.session_id = htobe64(session->id); - switch (cmd->minor) { + switch (conn->minor) { case 1: case 2: case 3: break; case 4: /* LTTng sessiond 2.4 */ default: - ret = cmd_create_session_2_4(cmd, session); - break; + ret = cmd_create_session_2_4(conn, session); } - lttng_ht_add_unique_u64(sessions_ht, &session->session_n); + lttng_ht_add_unique_u64(conn->sessions_ht, &session->session_n); DBG("Created session %" PRIu64, session->id); error: @@ -1119,7 +1164,7 @@ error: reply.ret_code = htobe32(LTTNG_OK); } - send_ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0); + send_ret = conn->sock->ops->sendmsg(conn->sock, &reply, sizeof(reply), 0); if (send_ret < 0) { ERR("Relayd sending session id"); ret = send_ret; @@ -1133,17 +1178,17 @@ error: * we make them visible to the viewer threads. */ static -void set_viewer_ready_flag(struct relay_command *cmd) +void set_viewer_ready_flag(struct relay_connection *conn) { struct relay_stream *stream, *tmp_stream; - pthread_mutex_lock(&cmd->session->viewer_ready_lock); - cds_list_for_each_entry_safe(stream, tmp_stream, &cmd->recv_head, + pthread_mutex_lock(&conn->session->viewer_ready_lock); + cds_list_for_each_entry_safe(stream, tmp_stream, &conn->recv_head, recv_list) { stream->viewer_ready = 1; cds_list_del(&stream->recv_list); } - pthread_mutex_unlock(&cmd->session->viewer_ready_lock); + pthread_mutex_unlock(&conn->session->viewer_ready_lock); return; } @@ -1152,12 +1197,13 @@ void set_viewer_ready_flag(struct relay_command *cmd) * handle. A new node is allocated thus must be freed when the node is deleted * from the list. */ -static void queue_stream(struct relay_stream *stream, struct relay_command *cmd) +static void queue_stream(struct relay_stream *stream, + struct relay_connection *conn) { - assert(cmd); + assert(conn); assert(stream); - cds_list_add(&stream->recv_list, &cmd->recv_head); + cds_list_add(&stream->recv_list, &conn->recv_head); } /* @@ -1165,15 +1211,15 @@ static void queue_stream(struct relay_stream *stream, struct relay_command *cmd) */ static int relay_add_stream(struct lttcomm_relayd_hdr *recv_hdr, - struct relay_command *cmd, struct lttng_ht *sessions_ht) + struct relay_connection *conn) { int ret, send_ret; - struct relay_session *session = cmd->session; + struct relay_session *session = conn->session; struct relay_stream *stream = NULL; struct lttcomm_relayd_status_stream reply; struct ctf_trace *trace; - if (!session || cmd->version_check_done == 0) { + if (!session || conn->version_check_done == 0) { ERR("Trying to add a stream before version check"); ret = -1; goto end_no_session; @@ -1186,13 +1232,13 @@ int relay_add_stream(struct lttcomm_relayd_hdr *recv_hdr, goto end_no_session; } - switch (cmd->minor) { + switch (conn->minor) { case 1: /* LTTng sessiond 2.1 */ - ret = cmd_recv_stream_2_1(cmd, stream); + ret = cmd_recv_stream_2_1(conn, stream); break; case 2: /* LTTng sessiond 2.2 */ default: - ret = cmd_recv_stream_2_2(cmd, stream); + ret = cmd_recv_stream_2_2(conn, stream); break; } if (ret < 0) { @@ -1205,6 +1251,7 @@ int relay_add_stream(struct lttcomm_relayd_hdr *recv_hdr, stream->session_id = session->id; stream->index_fd = -1; stream->read_index_fd = -1; + stream->ctf_stream_id = -1ULL; lttng_ht_node_init_u64(&stream->node, stream->stream_handle); pthread_mutex_init(&stream->lock, NULL); @@ -1253,7 +1300,7 @@ int relay_add_stream(struct lttcomm_relayd_hdr *recv_hdr, * message is received, this list is emptied and streams are set with the * viewer ready flag. */ - queue_stream(stream, cmd); + queue_stream(stream, conn); /* * Both in the ctf_trace object and the global stream ht since the data @@ -1268,6 +1315,7 @@ int relay_add_stream(struct lttcomm_relayd_hdr *recv_hdr, stream->stream_handle); end: + memset(&reply, 0, sizeof(reply)); reply.handle = htobe64(stream->stream_handle); /* send the session id to the client or a negative return code on error */ if (ret < 0) { @@ -1278,7 +1326,7 @@ end: reply.ret_code = htobe32(LTTNG_OK); } - send_ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, + send_ret = conn->sock->ops->sendmsg(conn->sock, &reply, sizeof(struct lttcomm_relayd_status_stream), 0); if (send_ret < 0) { ERR("Relay sending stream id"); @@ -1301,28 +1349,28 @@ err_free_stream: */ static int relay_close_stream(struct lttcomm_relayd_hdr *recv_hdr, - struct relay_command *cmd) + struct relay_connection *conn) { int ret, send_ret; - struct relay_session *session = cmd->session; + struct relay_session *session = conn->session; struct lttcomm_relayd_close_stream stream_info; struct lttcomm_relayd_generic_reply reply; struct relay_stream *stream; DBG("Close stream received"); - if (!session || cmd->version_check_done == 0) { + if (!session || conn->version_check_done == 0) { ERR("Trying to close a stream before version check"); ret = -1; goto end_no_session; } - ret = cmd->sock->ops->recvmsg(cmd->sock, &stream_info, + ret = conn->sock->ops->recvmsg(conn->sock, &stream_info, sizeof(struct lttcomm_relayd_close_stream), 0); if (ret < sizeof(struct lttcomm_relayd_close_stream)) { if (ret == 0) { /* Orderly shutdown. Not necessary to print an error. */ - DBG("Socket %d did an orderly shutdown", cmd->sock->fd); + DBG("Socket %d did an orderly shutdown", conn->sock->fd); } else { ERR("Relay didn't receive valid add_stream struct size : %d", ret); } @@ -1341,7 +1389,6 @@ int relay_close_stream(struct lttcomm_relayd_hdr *recv_hdr, stream->last_net_seq_num = be64toh(stream_info.last_net_seq_num); stream->close_flag = 1; session->stream_count--; - assert(session->stream_count >= 0); /* Check if we can close it or else the data will do it. */ try_close_stream(session, stream); @@ -1349,12 +1396,13 @@ int relay_close_stream(struct lttcomm_relayd_hdr *recv_hdr, end_unlock: rcu_read_unlock(); + memset(&reply, 0, sizeof(reply)); if (ret < 0) { reply.ret_code = htobe32(LTTNG_ERR_UNK); } else { reply.ret_code = htobe32(LTTNG_OK); } - send_ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, + send_ret = conn->sock->ops->sendmsg(conn->sock, &reply, sizeof(struct lttcomm_relayd_generic_reply), 0); if (send_ret < 0) { ERR("Relay sending stream id"); @@ -1369,13 +1417,14 @@ end_no_session: * relay_unknown_command: send -1 if received unknown command */ static -void relay_unknown_command(struct relay_command *cmd) +void relay_unknown_command(struct relay_connection *conn) { struct lttcomm_relayd_generic_reply reply; int ret; + memset(&reply, 0, sizeof(reply)); reply.ret_code = htobe32(LTTNG_ERR_UNK); - ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, + ret = conn->sock->ops->sendmsg(conn->sock, &reply, sizeof(struct lttcomm_relayd_generic_reply), 0); if (ret < 0) { ERR("Relay sending unknown command"); @@ -1388,19 +1437,20 @@ void relay_unknown_command(struct relay_command *cmd) */ static int relay_start(struct lttcomm_relayd_hdr *recv_hdr, - struct relay_command *cmd) + struct relay_connection *conn) { int ret = htobe32(LTTNG_OK); struct lttcomm_relayd_generic_reply reply; - struct relay_session *session = cmd->session; + struct relay_session *session = conn->session; if (!session) { DBG("Trying to start the streaming without a session established"); ret = htobe32(LTTNG_ERR_UNK); } + memset(&reply, 0, sizeof(reply)); reply.ret_code = ret; - ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, + ret = conn->sock->ops->sendmsg(conn->sock, &reply, sizeof(struct lttcomm_relayd_generic_reply), 0); if (ret < 0) { ERR("Relay sending start ack"); @@ -1444,11 +1494,11 @@ end: */ static int relay_recv_metadata(struct lttcomm_relayd_hdr *recv_hdr, - struct relay_command *cmd) + struct relay_connection *conn) { int ret = htobe32(LTTNG_OK); ssize_t size_ret; - struct relay_session *session = cmd->session; + struct relay_session *session = conn->session; struct lttcomm_relayd_metadata_payload *metadata_struct; struct relay_stream *metadata_stream; uint64_t data_size, payload_size; @@ -1484,11 +1534,11 @@ int relay_recv_metadata(struct lttcomm_relayd_hdr *recv_hdr, } memset(data_buffer, 0, data_size); DBG2("Relay receiving metadata, waiting for %" PRIu64 " bytes", data_size); - ret = cmd->sock->ops->recvmsg(cmd->sock, data_buffer, data_size, 0); + ret = conn->sock->ops->recvmsg(conn->sock, data_buffer, data_size, 0); if (ret < 0 || ret != data_size) { if (ret == 0) { /* Orderly shutdown. Not necessary to print an error. */ - DBG("Socket %d did an orderly shutdown", cmd->sock->fd); + DBG("Socket %d did an orderly shutdown", conn->sock->fd); } else { ERR("Relay didn't receive the whole metadata"); } @@ -1538,21 +1588,21 @@ end: */ static int relay_send_version(struct lttcomm_relayd_hdr *recv_hdr, - struct relay_command *cmd, struct lttng_ht *sessions_ht) + struct relay_connection *conn) { int ret; struct lttcomm_relayd_version reply, msg; - assert(cmd); + assert(conn); - cmd->version_check_done = 1; + conn->version_check_done = 1; /* Get version from the other side. */ - ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0); + ret = conn->sock->ops->recvmsg(conn->sock, &msg, sizeof(msg), 0); if (ret < 0 || ret != sizeof(msg)) { if (ret == 0) { /* Orderly shutdown. Not necessary to print an error. */ - DBG("Socket %d did an orderly shutdown", cmd->sock->fd); + DBG("Socket %d did an orderly shutdown", conn->sock->fd); } else { ERR("Relay failed to receive the version values."); } @@ -1560,6 +1610,7 @@ int relay_send_version(struct lttcomm_relayd_hdr *recv_hdr, goto end; } + memset(&reply, 0, sizeof(reply)); reply.major = RELAYD_VERSION_COMM_MAJOR; reply.minor = RELAYD_VERSION_COMM_MINOR; @@ -1567,29 +1618,29 @@ int relay_send_version(struct lttcomm_relayd_hdr *recv_hdr, if (reply.major != be32toh(msg.major)) { DBG("Incompatible major versions (%u vs %u), deleting session", reply.major, be32toh(msg.major)); - relay_delete_session(cmd, sessions_ht); + destroy_session(conn->session, conn->sessions_ht); ret = 0; goto end; } - cmd->major = reply.major; + conn->major = reply.major; /* We adapt to the lowest compatible version */ if (reply.minor <= be32toh(msg.minor)) { - cmd->minor = reply.minor; + conn->minor = reply.minor; } else { - cmd->minor = be32toh(msg.minor); + conn->minor = be32toh(msg.minor); } reply.major = htobe32(reply.major); reply.minor = htobe32(reply.minor); - ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, + ret = conn->sock->ops->sendmsg(conn->sock, &reply, sizeof(struct lttcomm_relayd_version), 0); if (ret < 0) { ERR("Relay sending version"); } - DBG("Version check done using protocol %u.%u", cmd->major, - cmd->minor); + DBG("Version check done using protocol %u.%u", conn->major, + conn->minor); end: return ret; @@ -1600,9 +1651,9 @@ end: */ static int relay_data_pending(struct lttcomm_relayd_hdr *recv_hdr, - struct relay_command *cmd) + struct relay_connection *conn) { - struct relay_session *session = cmd->session; + struct relay_session *session = conn->session; struct lttcomm_relayd_data_pending msg; struct lttcomm_relayd_generic_reply reply; struct relay_stream *stream; @@ -1611,17 +1662,17 @@ int relay_data_pending(struct lttcomm_relayd_hdr *recv_hdr, DBG("Data pending command received"); - if (!session || cmd->version_check_done == 0) { + if (!session || conn->version_check_done == 0) { ERR("Trying to check for data before version check"); ret = -1; goto end_no_session; } - ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0); + ret = conn->sock->ops->recvmsg(conn->sock, &msg, sizeof(msg), 0); if (ret < sizeof(msg)) { if (ret == 0) { /* Orderly shutdown. Not necessary to print an error. */ - DBG("Socket %d did an orderly shutdown", cmd->sock->fd); + DBG("Socket %d did an orderly shutdown", conn->sock->fd); } else { ERR("Relay didn't receive valid data_pending struct size : %d", ret); @@ -1659,8 +1710,9 @@ int relay_data_pending(struct lttcomm_relayd_hdr *recv_hdr, end_unlock: rcu_read_unlock(); + memset(&reply, 0, sizeof(reply)); reply.ret_code = htobe32(ret); - ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0); + ret = conn->sock->ops->sendmsg(conn->sock, &reply, sizeof(reply), 0); if (ret < 0) { ERR("Relay data pending ret code failed"); } @@ -1678,7 +1730,7 @@ end_no_session: */ static int relay_quiescent_control(struct lttcomm_relayd_hdr *recv_hdr, - struct relay_command *cmd) + struct relay_connection *conn) { int ret; uint64_t stream_id; @@ -1689,17 +1741,17 @@ int relay_quiescent_control(struct lttcomm_relayd_hdr *recv_hdr, DBG("Checking quiescent state on control socket"); - if (!cmd->session || cmd->version_check_done == 0) { + if (!conn->session || conn->version_check_done == 0) { ERR("Trying to check for data before version check"); ret = -1; goto end_no_session; } - ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0); + ret = conn->sock->ops->recvmsg(conn->sock, &msg, sizeof(msg), 0); if (ret < sizeof(msg)) { if (ret == 0) { /* Orderly shutdown. Not necessary to print an error. */ - DBG("Socket %d did an orderly shutdown", cmd->sock->fd); + DBG("Socket %d did an orderly shutdown", conn->sock->fd); } else { ERR("Relay didn't receive valid begin data_pending struct size: %d", ret); @@ -1722,8 +1774,9 @@ int relay_quiescent_control(struct lttcomm_relayd_hdr *recv_hdr, } rcu_read_unlock(); + memset(&reply, 0, sizeof(reply)); reply.ret_code = htobe32(LTTNG_OK); - ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0); + ret = conn->sock->ops->sendmsg(conn->sock, &reply, sizeof(reply), 0); if (ret < 0) { ERR("Relay data quiescent control ret code failed"); } @@ -1741,7 +1794,7 @@ end_no_session: */ static int relay_begin_data_pending(struct lttcomm_relayd_hdr *recv_hdr, - struct relay_command *cmd) + struct relay_connection *conn) { int ret; struct lttng_ht_iter iter; @@ -1751,21 +1804,21 @@ int relay_begin_data_pending(struct lttcomm_relayd_hdr *recv_hdr, uint64_t session_id; assert(recv_hdr); - assert(cmd); + assert(conn); DBG("Init streams for data pending"); - if (!cmd->session || cmd->version_check_done == 0) { + if (!conn->session || conn->version_check_done == 0) { ERR("Trying to check for data before version check"); ret = -1; goto end_no_session; } - ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0); + ret = conn->sock->ops->recvmsg(conn->sock, &msg, sizeof(msg), 0); if (ret < sizeof(msg)) { if (ret == 0) { /* Orderly shutdown. Not necessary to print an error. */ - DBG("Socket %d did an orderly shutdown", cmd->sock->fd); + DBG("Socket %d did an orderly shutdown", conn->sock->fd); } else { ERR("Relay didn't receive valid begin data_pending struct size: %d", ret); @@ -1792,10 +1845,11 @@ int relay_begin_data_pending(struct lttcomm_relayd_hdr *recv_hdr, } rcu_read_unlock(); + memset(&reply, 0, sizeof(reply)); /* All good, send back reply. */ reply.ret_code = htobe32(LTTNG_OK); - ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0); + ret = conn->sock->ops->sendmsg(conn->sock, &reply, sizeof(reply), 0); if (ret < 0) { ERR("Relay begin data pending send reply failed"); } @@ -1815,7 +1869,7 @@ end_no_session: */ static int relay_end_data_pending(struct lttcomm_relayd_hdr *recv_hdr, - struct relay_command *cmd) + struct relay_connection *conn) { int ret; struct lttng_ht_iter iter; @@ -1826,21 +1880,21 @@ int relay_end_data_pending(struct lttcomm_relayd_hdr *recv_hdr, uint32_t is_data_inflight = 0; assert(recv_hdr); - assert(cmd); + assert(conn); DBG("End data pending command"); - if (!cmd->session || cmd->version_check_done == 0) { + if (!conn->session || conn->version_check_done == 0) { ERR("Trying to check for data before version check"); ret = -1; goto end_no_session; } - ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0); + ret = conn->sock->ops->recvmsg(conn->sock, &msg, sizeof(msg), 0); if (ret < sizeof(msg)) { if (ret == 0) { /* Orderly shutdown. Not necessary to print an error. */ - DBG("Socket %d did an orderly shutdown", cmd->sock->fd); + DBG("Socket %d did an orderly shutdown", conn->sock->fd); } else { ERR("Relay didn't receive valid end data_pending struct size: %d", ret); @@ -1856,7 +1910,7 @@ int relay_end_data_pending(struct lttcomm_relayd_hdr *recv_hdr, cds_lfht_for_each_entry(relay_streams_ht->ht, &iter.iter, stream, node.node) { if (stream->session_id == session_id && - !stream->data_pending_check_done) { + !stream->data_pending_check_done && !stream->terminated_flag) { is_data_inflight = 1; DBG("Data is still in flight for stream %" PRIu64, stream->stream_handle); @@ -1865,10 +1919,11 @@ int relay_end_data_pending(struct lttcomm_relayd_hdr *recv_hdr, } rcu_read_unlock(); + memset(&reply, 0, sizeof(reply)); /* All good, send back reply. */ reply.ret_code = htobe32(is_data_inflight); - ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0); + ret = conn->sock->ops->sendmsg(conn->sock, &reply, sizeof(reply), 0); if (ret < 0) { ERR("Relay end data pending send reply failed"); } @@ -1884,32 +1939,32 @@ end_no_session: */ static int relay_recv_index(struct lttcomm_relayd_hdr *recv_hdr, - struct relay_command *cmd) + struct relay_connection *conn) { int ret, send_ret, index_created = 0; - struct relay_session *session = cmd->session; + struct relay_session *session = conn->session; struct lttcomm_relayd_index index_info; struct relay_index *index, *wr_index = NULL; struct lttcomm_relayd_generic_reply reply; struct relay_stream *stream; uint64_t net_seq_num; - assert(cmd); + assert(conn); DBG("Relay receiving index"); - if (!session || cmd->version_check_done == 0) { + if (!session || conn->version_check_done == 0) { ERR("Trying to close a stream before version check"); ret = -1; goto end_no_session; } - ret = cmd->sock->ops->recvmsg(cmd->sock, &index_info, + ret = conn->sock->ops->recvmsg(conn->sock, &index_info, sizeof(index_info), 0); if (ret < sizeof(index_info)) { if (ret == 0) { /* Orderly shutdown. Not necessary to print an error. */ - DBG("Socket %d did an orderly shutdown", cmd->sock->fd); + DBG("Socket %d did an orderly shutdown", conn->sock->fd); } else { ERR("Relay didn't receive valid index struct size : %d", ret); } @@ -1932,9 +1987,10 @@ int relay_recv_index(struct lttcomm_relayd_hdr *recv_hdr, DBG("Received live beacon for stream %" PRIu64, stream->stream_handle); /* - * Only flag a stream inactive when it has already received data. + * Only flag a stream inactive when it has already received data + * and no indexes are in flight. */ - if (stream->total_index_received > 0) { + if (stream->total_index_received > 0 && stream->indexes_in_flight == 0) { stream->beacon_ts_end = be64toh(index_info.timestamp_end); } ret = 0; @@ -1951,9 +2007,13 @@ int relay_recv_index(struct lttcomm_relayd_hdr *recv_hdr, goto end_rcu_unlock; } index_created = 1; + stream->indexes_in_flight++; } copy_index_control_data(index, &index_info); + if (stream->ctf_stream_id == -1ULL) { + stream->ctf_stream_id = be64toh(index_info.stream_id); + } if (index_created) { /* @@ -1973,33 +2033,25 @@ int relay_recv_index(struct lttcomm_relayd_hdr *recv_hdr, /* Do we have a writable ready index to write on disk. */ if (wr_index) { - /* Starting at 2.4, create the index file if none available. */ - if (cmd->minor >= 4 && stream->index_fd < 0) { - ret = index_create_file(stream->path_name, stream->channel_name, - relayd_uid, relayd_gid, stream->tracefile_size, - stream->tracefile_count_current); - if (ret < 0) { - goto end_rcu_unlock; - } - stream->index_fd = ret; - } - ret = relay_index_write(wr_index->fd, wr_index); if (ret < 0) { goto end_rcu_unlock; } stream->total_index_received++; + stream->indexes_in_flight--; + assert(stream->indexes_in_flight >= 0); } end_rcu_unlock: rcu_read_unlock(); + memset(&reply, 0, sizeof(reply)); if (ret < 0) { reply.ret_code = htobe32(LTTNG_ERR_UNK); } else { reply.ret_code = htobe32(LTTNG_OK); } - send_ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0); + send_ret = conn->sock->ops->sendmsg(conn->sock, &reply, sizeof(reply), 0); if (send_ret < 0) { ERR("Relay sending close index id reply"); ret = send_ret; @@ -2016,16 +2068,16 @@ end_no_session: */ static int relay_streams_sent(struct lttcomm_relayd_hdr *recv_hdr, - struct relay_command *cmd) + struct relay_connection *conn) { int ret, send_ret; struct lttcomm_relayd_generic_reply reply; - assert(cmd); + assert(conn); DBG("Relay receiving streams_sent"); - if (!cmd->session || cmd->version_check_done == 0) { + if (!conn->session || conn->version_check_done == 0) { ERR("Trying to close a stream before version check"); ret = -1; goto end_no_session; @@ -2035,15 +2087,18 @@ int relay_streams_sent(struct lttcomm_relayd_hdr *recv_hdr, * Flag every pending stream in the connection recv list that they are * ready to be used by the viewer. */ - set_viewer_ready_flag(cmd); + set_viewer_ready_flag(conn); /* * Inform the viewer that there are new streams in the session. */ - uatomic_set(&cmd->session->new_streams, 1); + if (conn->session->viewer_refcount) { + uatomic_set(&conn->session->new_streams, 1); + } + memset(&reply, 0, sizeof(reply)); reply.ret_code = htobe32(LTTNG_OK); - send_ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0); + send_ret = conn->sock->ops->sendmsg(conn->sock, &reply, sizeof(reply), 0); if (send_ret < 0) { ERR("Relay sending sent_stream reply"); ret = send_ret; @@ -2061,51 +2116,51 @@ end_no_session: */ static int relay_process_control(struct lttcomm_relayd_hdr *recv_hdr, - struct relay_command *cmd, struct relay_local_data *ctx) + struct relay_connection *conn) { int ret = 0; switch (be32toh(recv_hdr->cmd)) { case RELAYD_CREATE_SESSION: - ret = relay_create_session(recv_hdr, cmd, ctx->sessions_ht); + ret = relay_create_session(recv_hdr, conn); break; case RELAYD_ADD_STREAM: - ret = relay_add_stream(recv_hdr, cmd, ctx->sessions_ht); + ret = relay_add_stream(recv_hdr, conn); break; case RELAYD_START_DATA: - ret = relay_start(recv_hdr, cmd); + ret = relay_start(recv_hdr, conn); break; case RELAYD_SEND_METADATA: - ret = relay_recv_metadata(recv_hdr, cmd); + ret = relay_recv_metadata(recv_hdr, conn); break; case RELAYD_VERSION: - ret = relay_send_version(recv_hdr, cmd, ctx->sessions_ht); + ret = relay_send_version(recv_hdr, conn); break; case RELAYD_CLOSE_STREAM: - ret = relay_close_stream(recv_hdr, cmd); + ret = relay_close_stream(recv_hdr, conn); break; case RELAYD_DATA_PENDING: - ret = relay_data_pending(recv_hdr, cmd); + ret = relay_data_pending(recv_hdr, conn); break; case RELAYD_QUIESCENT_CONTROL: - ret = relay_quiescent_control(recv_hdr, cmd); + ret = relay_quiescent_control(recv_hdr, conn); break; case RELAYD_BEGIN_DATA_PENDING: - ret = relay_begin_data_pending(recv_hdr, cmd); + ret = relay_begin_data_pending(recv_hdr, conn); break; case RELAYD_END_DATA_PENDING: - ret = relay_end_data_pending(recv_hdr, cmd); + ret = relay_end_data_pending(recv_hdr, conn); break; case RELAYD_SEND_INDEX: - ret = relay_recv_index(recv_hdr, cmd); + ret = relay_recv_index(recv_hdr, conn); break; case RELAYD_STREAMS_SENT: - ret = relay_streams_sent(recv_hdr, cmd); + ret = relay_streams_sent(recv_hdr, conn); break; case RELAYD_UPDATE_SYNC_INFO: default: ERR("Received unknown command (%u)", be32toh(recv_hdr->cmd)); - relay_unknown_command(cmd); + relay_unknown_command(conn); ret = -1; goto end; } @@ -2148,6 +2203,7 @@ static int handle_index_data(struct relay_stream *stream, uint64_t net_seq_num, goto error; } index_created = 1; + stream->indexes_in_flight++; } if (rotate_index || stream->index_fd < 0) { @@ -2190,6 +2246,8 @@ static int handle_index_data(struct relay_stream *stream, uint64_t net_seq_num, goto error; } stream->total_index_received++; + stream->indexes_in_flight--; + assert(stream->indexes_in_flight >= 0); } error: @@ -2200,7 +2258,7 @@ error: * relay_process_data: Process the data received on the data socket */ static -int relay_process_data(struct relay_command *cmd, struct lttng_ht *sessions_ht) +int relay_process_data(struct relay_connection *conn) { int ret = 0, rotate_index = 0; ssize_t size_ret; @@ -2211,14 +2269,16 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *sessions_ht) uint32_t data_size; struct relay_session *session; - ret = cmd->sock->ops->recvmsg(cmd->sock, &data_hdr, + assert(conn); + + ret = conn->sock->ops->recvmsg(conn->sock, &data_hdr, sizeof(struct lttcomm_relayd_data_hdr), 0); if (ret <= 0) { if (ret == 0) { /* Orderly shutdown. Not necessary to print an error. */ - DBG("Socket %d did an orderly shutdown", cmd->sock->fd); + DBG("Socket %d did an orderly shutdown", conn->sock->fd); } else { - ERR("Unable to receive data header on sock %d", cmd->sock->fd); + ERR("Unable to receive data header on sock %d", conn->sock->fd); } ret = -1; goto end; @@ -2233,7 +2293,7 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *sessions_ht) goto end_rcu_unlock; } - session = session_find_by_id(sessions_ht, stream->session_id); + session = session_find_by_id(conn->sessions_ht, stream->session_id); assert(session); data_size = be32toh(data_hdr.data_size); @@ -2256,11 +2316,11 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *sessions_ht) DBG3("Receiving data of size %u for stream id %" PRIu64 " seqnum %" PRIu64, data_size, stream_id, net_seq_num); - ret = cmd->sock->ops->recvmsg(cmd->sock, data_buffer, data_size, 0); + ret = conn->sock->ops->recvmsg(conn->sock, data_buffer, data_size, 0); if (ret <= 0) { if (ret == 0) { /* Orderly shutdown. Not necessary to print an error. */ - DBG("Socket %d did an orderly shutdown", cmd->sock->fd); + DBG("Socket %d did an orderly shutdown", conn->sock->fd); } ret = -1; goto end_rcu_unlock; @@ -2299,7 +2359,7 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *sessions_ht) pthread_mutex_lock(&vstream->overwrite_lock); vstream->abort_flag = 1; pthread_mutex_unlock(&vstream->overwrite_lock); - DBG("Streaming side setting abort_flag on stream %s_%lu\n", + DBG("Streaming side setting abort_flag on stream %s_%" PRIu64 "\n", stream->channel_name, new_id); } else if (vstream->tracefile_count_current == stream->tracefile_count_current) { @@ -2316,7 +2376,6 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *sessions_ht) stream->tracefile_size, stream->tracefile_count, relayd_uid, relayd_gid, stream->fd, &(stream->tracefile_count_current), &stream->fd); - stream->total_index_received = 0; pthread_mutex_unlock(&stream->viewer_stream_rotation_lock); if (ret < 0) { ERR("Rotating stream output file"); @@ -2366,11 +2425,13 @@ end: } static -void relay_cleanup_poll_connection(struct lttng_poll_event *events, int pollfd) +void cleanup_connection_pollfd(struct lttng_poll_event *events, int pollfd) { int ret; - lttng_poll_del(events, pollfd); + assert(events); + + (void) lttng_poll_del(events, pollfd); ret = close(pollfd); if (ret < 0) { @@ -2378,75 +2439,20 @@ void relay_cleanup_poll_connection(struct lttng_poll_event *events, int pollfd) } } -static -int relay_add_connection(int fd, struct lttng_poll_event *events, - struct lttng_ht *relay_connections_ht) -{ - struct relay_command *relay_connection; - ssize_t ret; - - relay_connection = zmalloc(sizeof(struct relay_command)); - if (relay_connection == NULL) { - PERROR("Relay command zmalloc"); - goto error; - } - ret = lttng_read(fd, relay_connection, sizeof(struct relay_command)); - if (ret < sizeof(struct relay_command)) { - PERROR("read relay cmd pipe"); - goto error_read; - } - CDS_INIT_LIST_HEAD(&relay_connection->recv_head); - - lttng_ht_node_init_ulong(&relay_connection->sock_n, - (unsigned long) relay_connection->sock->fd); - rcu_read_lock(); - lttng_ht_add_unique_ulong(relay_connections_ht, - &relay_connection->sock_n); - rcu_read_unlock(); - return lttng_poll_add(events, - relay_connection->sock->fd, - LPOLLIN | LPOLLRDHUP); - -error_read: - free(relay_connection); -error: - return -1; -} - -static -void deferred_free_connection(struct rcu_head *head) +static void destroy_connection(struct lttng_ht *relay_connections_ht, + struct relay_connection *conn) { - struct relay_command *relay_connection = - caa_container_of(head, struct relay_command, rcu_node); + assert(relay_connections_ht); + assert(conn); - lttcomm_destroy_sock(relay_connection->sock); - free(relay_connection); -} - -static -void relay_del_connection(struct lttng_ht *relay_connections_ht, - struct lttng_ht_iter *iter, struct relay_command *relay_connection, - struct lttng_ht *sessions_ht) -{ - int ret; - - ret = lttng_ht_del(relay_connections_ht, iter); - assert(!ret); - - if (relay_connection->type == RELAY_CONTROL) { - struct relay_stream *stream, *tmp_stream; - - /* Clean up recv list. */ - cds_list_for_each_entry_safe(stream, tmp_stream, - &relay_connection->recv_head, recv_list) { - cds_list_del(&stream->recv_list); - } - - relay_delete_session(relay_connection, sessions_ht); + connection_delete(relay_connections_ht, conn); + /* For the control socket, we try to destroy the session. */ + if (conn->type == RELAY_CONTROL && conn->session) { + destroy_session(conn->session, conn->sessions_ht); } - call_rcu(&relay_connection->rcu_node, deferred_free_connection); + connection_destroy(conn); } /* @@ -2457,14 +2463,14 @@ void *relay_thread_worker(void *data) { int ret, err = -1, last_seen_data_fd = -1; uint32_t nb_fd; - struct relay_command *relay_connection; + struct relay_connection *conn; struct lttng_poll_event events; struct lttng_ht *relay_connections_ht; - struct lttng_ht_node_ulong *node; struct lttng_ht_iter iter; struct lttcomm_relayd_hdr recv_hdr; struct relay_local_data *relay_ctx = (struct relay_local_data *) data; struct lttng_ht *sessions_ht = relay_ctx->sessions_ht; + struct relay_index *index; DBG("[thread] Relay worker started"); @@ -2495,7 +2501,7 @@ void *relay_thread_worker(void *data) goto error_poll_create; } - ret = lttng_poll_add(&events, relay_cmd_pipe[0], LPOLLIN | LPOLLRDHUP); + ret = lttng_poll_add(&events, relay_conn_pipe[0], LPOLLIN | LPOLLRDHUP); if (ret < 0) { goto error; } @@ -2535,6 +2541,11 @@ restart: health_code_update(); + if (!revents) { + /* No activity for this FD (poll implementation). */ + continue; + } + /* Thread quit pipe has been closed. Killing thread. */ ret = check_thread_quit_pipe(pollfd, revents); if (ret) { @@ -2542,73 +2553,53 @@ restart: goto exit; } - /* Inspect the relay cmd pipe for new connection */ - if (pollfd == relay_cmd_pipe[0]) { + /* Inspect the relay conn pipe for new connection */ + if (pollfd == relay_conn_pipe[0]) { if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) { - ERR("Relay pipe error"); + ERR("Relay connection pipe error"); goto error; } else if (revents & LPOLLIN) { - DBG("Relay command received"); - ret = relay_add_connection(relay_cmd_pipe[0], - &events, relay_connections_ht); + ret = lttng_read(relay_conn_pipe[0], &conn, sizeof(conn)); if (ret < 0) { goto error; } - } - } else if (revents) { - rcu_read_lock(); - lttng_ht_lookup(relay_connections_ht, - (void *)((unsigned long) pollfd), - &iter); - node = lttng_ht_iter_get_node_ulong(&iter); - if (node == NULL) { - DBG2("Relay sock %d not found", pollfd); + conn->sessions_ht = sessions_ht; + connection_init(conn); + lttng_poll_add(&events, conn->sock->fd, + LPOLLIN | LPOLLRDHUP); + rcu_read_lock(); + lttng_ht_add_unique_ulong(relay_connections_ht, + &conn->sock_n); rcu_read_unlock(); - goto error; + DBG("Connection socket %d added", conn->sock->fd); } - relay_connection = caa_container_of(node, - struct relay_command, sock_n); - - if (revents & (LPOLLERR)) { - ERR("POLL ERROR"); - relay_cleanup_poll_connection(&events, pollfd); - relay_del_connection(relay_connections_ht, - &iter, relay_connection, sessions_ht); - if (last_seen_data_fd == pollfd) { - last_seen_data_fd = last_notdel_data_fd; - } - } else if (revents & (LPOLLHUP | LPOLLRDHUP)) { - DBG("Socket %d hung up", pollfd); - relay_cleanup_poll_connection(&events, pollfd); - relay_del_connection(relay_connections_ht, - &iter, relay_connection, sessions_ht); + } else { + rcu_read_lock(); + conn = connection_find_by_sock(relay_connections_ht, pollfd); + /* If not found, there is a synchronization issue. */ + assert(conn); + + if (revents & (LPOLLERR | LPOLLHUP | LPOLLRDHUP)) { + cleanup_connection_pollfd(&events, pollfd); + destroy_connection(relay_connections_ht, conn); if (last_seen_data_fd == pollfd) { last_seen_data_fd = last_notdel_data_fd; } } else if (revents & LPOLLIN) { - /* control socket */ - if (relay_connection->type == RELAY_CONTROL) { - ret = relay_connection->sock->ops->recvmsg( - relay_connection->sock, &recv_hdr, - sizeof(struct lttcomm_relayd_hdr), 0); - /* connection closed */ + if (conn->type == RELAY_CONTROL) { + ret = conn->sock->ops->recvmsg(conn->sock, &recv_hdr, + sizeof(recv_hdr), 0); if (ret <= 0) { - relay_cleanup_poll_connection(&events, pollfd); - relay_del_connection(relay_connections_ht, - &iter, relay_connection, sessions_ht); + /* Connection closed */ + cleanup_connection_pollfd(&events, pollfd); + destroy_connection(relay_connections_ht, conn); DBG("Control connection closed with %d", pollfd); } else { - if (relay_connection->session) { - DBG2("Relay worker receiving data for session : %" PRIu64, - relay_connection->session->id); - } - ret = relay_process_control(&recv_hdr, - relay_connection, relay_ctx); + ret = relay_process_control(&recv_hdr, conn); if (ret < 0) { /* Clear the session on error. */ - relay_cleanup_poll_connection(&events, pollfd); - relay_del_connection(relay_connections_ht, - &iter, relay_connection, sessions_ht); + cleanup_connection_pollfd(&events, pollfd); + destroy_connection(relay_connections_ht, conn); DBG("Connection closed with %d", pollfd); } seen_control = 1; @@ -2621,6 +2612,8 @@ restart: */ last_notdel_data_fd = pollfd; } + } else { + ERR("Unknown poll events %u for sock %d", revents, pollfd); } rcu_read_unlock(); } @@ -2655,52 +2648,49 @@ restart: health_code_update(); + if (!revents) { + /* No activity for this FD (poll implementation). */ + continue; + } + /* Skip the command pipe. It's handled in the first loop. */ - if (pollfd == relay_cmd_pipe[0]) { + if (pollfd == relay_conn_pipe[0]) { continue; } - if (revents) { - rcu_read_lock(); - lttng_ht_lookup(relay_connections_ht, - (void *)((unsigned long) pollfd), - &iter); - node = lttng_ht_iter_get_node_ulong(&iter); - if (node == NULL) { - /* Skip it. Might be removed before. */ + rcu_read_lock(); + conn = connection_find_by_sock(relay_connections_ht, pollfd); + if (!conn) { + /* Skip it. Might be removed before. */ + rcu_read_unlock(); + continue; + } + + if (revents & LPOLLIN) { + if (conn->type != RELAY_DATA) { rcu_read_unlock(); continue; } - relay_connection = caa_container_of(node, - struct relay_command, sock_n); - if (revents & LPOLLIN) { - if (relay_connection->type != RELAY_DATA) { - continue; - } - - ret = relay_process_data(relay_connection, - sessions_ht); - /* connection closed */ - if (ret < 0) { - relay_cleanup_poll_connection(&events, pollfd); - relay_del_connection(relay_connections_ht, - &iter, relay_connection, sessions_ht); - DBG("Data connection closed with %d", pollfd); - /* - * Every goto restart call sets the last seen fd where - * here we don't really care since we gracefully - * continue the loop after the connection is deleted. - */ - } else { - /* Keep last seen port. */ - last_seen_data_fd = pollfd; - rcu_read_unlock(); - goto restart; - } + ret = relay_process_data(conn); + /* Connection closed */ + if (ret < 0) { + cleanup_connection_pollfd(&events, pollfd); + destroy_connection(relay_connections_ht, conn); + DBG("Data connection closed with %d", pollfd); + /* + * Every goto restart call sets the last seen fd where + * here we don't really care since we gracefully + * continue the loop after the connection is deleted. + */ + } else { + /* Keep last seen port. */ + last_seen_data_fd = pollfd; + rcu_read_unlock(); + goto restart; } - rcu_read_unlock(); } + rcu_read_unlock(); } last_seen_data_fd = -1; } @@ -2712,27 +2702,29 @@ exit: error: lttng_poll_clean(&events); - /* empty the hash table and free the memory */ + /* Cleanup reamaining connection object. */ rcu_read_lock(); - cds_lfht_for_each_entry(relay_connections_ht->ht, &iter.iter, node, node) { + cds_lfht_for_each_entry(relay_connections_ht->ht, &iter.iter, conn, + sock_n.node) { health_code_update(); - - node = lttng_ht_iter_get_node_ulong(&iter); - if (node) { - relay_connection = caa_container_of(node, - struct relay_command, sock_n); - relay_del_connection(relay_connections_ht, - &iter, relay_connection, sessions_ht); - } + destroy_connection(relay_connections_ht, conn); } rcu_read_unlock(); error_poll_create: + rcu_read_lock(); + cds_lfht_for_each_entry(indexes_ht->ht, &iter.iter, index, + index_n.node) { + health_code_update(); + relay_index_delete(index); + relay_index_free_safe(index); + } + rcu_read_unlock(); lttng_ht_destroy(indexes_ht); indexes_ht_error: lttng_ht_destroy(relay_connections_ht); relay_connections_ht_error: - /* Close relay cmd pipes */ - utils_close_pipe(relay_cmd_pipe); + /* Close relay conn pipes */ + utils_close_pipe(relay_conn_pipe); if (err) { DBG("Thread exited with error"); } @@ -2745,7 +2737,7 @@ error_testpoint: } health_unregister(health_relayd); rcu_unregister_thread(); - stop_threads(); + lttng_relay_stop_threads(); return NULL; } @@ -2753,11 +2745,11 @@ error_testpoint: * Create the relay command pipe to wake thread_manage_apps. * Closed in cleanup(). */ -static int create_relay_cmd_pipe(void) +static int create_relay_conn_pipe(void) { int ret; - ret = utils_create_pipe_cloexec(relay_cmd_pipe); + ret = utils_create_pipe_cloexec(relay_conn_pipe); return ret; } @@ -2767,31 +2759,35 @@ static int create_relay_cmd_pipe(void) */ int main(int argc, char **argv) { - int ret = 0; + int ret = 0, retval = 0; void *status; - struct relay_local_data *relay_ctx; + struct relay_local_data *relay_ctx = NULL; /* Parse arguments */ progname = argv[0]; - if ((ret = set_options(argc, argv)) < 0) { - goto exit; + if (set_options(argc, argv)) { + retval = -1; + goto exit_options; } - if ((ret = set_signal_handler()) < 0) { - goto exit; + if (set_signal_handler()) { + retval = -1; + goto exit_options; } /* Try to create directory if -o, --output is specified. */ if (opt_output_path) { if (*opt_output_path != '/') { ERR("Please specify an absolute path for -o, --output PATH"); - goto exit; + retval = -1; + goto exit_options; } ret = utils_mkdir_recursive(opt_output_path, S_IRWXU | S_IRWXG); if (ret < 0) { ERR("Unable to create %s", opt_output_path); - goto exit; + retval = -1; + goto exit_options; } } @@ -2802,7 +2798,8 @@ int main(int argc, char **argv) ret = lttng_daemonize(&child_ppid, &recv_child_signal, !opt_background); if (ret < 0) { - goto exit; + retval = -1; + goto exit_options; } /* @@ -2815,9 +2812,19 @@ int main(int argc, char **argv) } } + + /* Initialize thread health monitoring */ + health_relayd = health_app_create(NR_HEALTH_RELAYD_TYPES); + if (!health_relayd) { + PERROR("health_app_create error"); + retval = -1; + goto exit_health_app_create; + } + /* Create thread quit pipe */ - if ((ret = init_thread_quit_pipe()) < 0) { - goto error; + if (init_thread_quit_pipe()) { + retval = -1; + goto exit_init_data; } /* We need those values for the file/dir creation. */ @@ -2828,21 +2835,25 @@ int main(int argc, char **argv) if (relayd_uid == 0) { if (control_uri->port < 1024 || data_uri->port < 1024 || live_uri->port < 1024) { ERR("Need to be root to use ports < 1024"); - ret = -1; - goto exit; + retval = -1; + goto exit_init_data; } } /* Setup the thread apps communication pipe. */ - if ((ret = create_relay_cmd_pipe()) < 0) { - goto exit; + if (create_relay_conn_pipe()) { + retval = -1; + goto exit_init_data; } /* Init relay command queue. */ - cds_wfq_init(&relay_cmd_queue.queue); + cds_wfcq_init(&relay_conn_queue.head, &relay_conn_queue.tail); /* Set up max poll set size */ - lttng_poll_set_max_size(); + if (lttng_poll_set_max_size()) { + retval = -1; + goto exit_init_data; + } /* Initialize communication library */ lttcomm_init(); @@ -2851,134 +2862,139 @@ int main(int argc, char **argv) relay_ctx = zmalloc(sizeof(struct relay_local_data)); if (!relay_ctx) { PERROR("relay_ctx"); - goto exit; + retval = -1; + goto exit_init_data; } /* tables of sessions indexed by session ID */ relay_ctx->sessions_ht = lttng_ht_new(0, LTTNG_HT_TYPE_U64); if (!relay_ctx->sessions_ht) { - goto exit_relay_ctx_sessions; + retval = -1; + goto exit_init_data; } /* tables of streams indexed by stream ID */ relay_streams_ht = lttng_ht_new(0, LTTNG_HT_TYPE_U64); if (!relay_streams_ht) { - goto exit_relay_ctx_streams; + retval = -1; + goto exit_init_data; } /* tables of streams indexed by stream ID */ viewer_streams_ht = lttng_ht_new(0, LTTNG_HT_TYPE_U64); if (!viewer_streams_ht) { - goto exit_relay_ctx_viewer_streams; - } - - /* Initialize thread health monitoring */ - health_relayd = health_app_create(NR_HEALTH_RELAYD_TYPES); - if (!health_relayd) { - PERROR("health_app_create error"); - goto exit_health_app_create; + retval = -1; + goto exit_init_data; } ret = utils_create_pipe(health_quit_pipe); - if (ret < 0) { - goto error_health_pipe; + if (ret) { + retval = -1; + goto exit_health_quit_pipe; } /* Create thread to manage the client socket */ ret = pthread_create(&health_thread, NULL, thread_manage_health, (void *) NULL); - if (ret != 0) { + if (ret) { + errno = ret; PERROR("pthread_create health"); - goto health_error; + retval = -1; + goto exit_health_thread; } /* Setup the dispatcher thread */ ret = pthread_create(&dispatcher_thread, NULL, relay_thread_dispatcher, (void *) NULL); - if (ret != 0) { + if (ret) { + errno = ret; PERROR("pthread_create dispatcher"); - goto exit_dispatcher; + retval = -1; + goto exit_dispatcher_thread; } /* Setup the worker thread */ ret = pthread_create(&worker_thread, NULL, relay_thread_worker, (void *) relay_ctx); - if (ret != 0) { + if (ret) { + errno = ret; PERROR("pthread_create worker"); - goto exit_worker; + retval = -1; + goto exit_worker_thread; } /* Setup the listener thread */ ret = pthread_create(&listener_thread, NULL, relay_thread_listener, (void *) NULL); - if (ret != 0) { + if (ret) { + errno = ret; PERROR("pthread_create listener"); - goto exit_listener; + retval = -1; + goto exit_listener_thread; } - ret = live_start_threads(live_uri, relay_ctx); - if (ret != 0) { + ret = relayd_live_create(live_uri, relay_ctx); + if (ret) { ERR("Starting live viewer threads"); + retval = -1; goto exit_live; } + /* + * This is where we start awaiting program completion (e.g. through + * signal that asks threads to teardown). + */ + + ret = relayd_live_join(); + if (ret) { + retval = -1; + } exit_live: + ret = pthread_join(listener_thread, &status); - if (ret != 0) { - PERROR("pthread_join"); - goto error; /* join error, exit without cleanup */ + if (ret) { + errno = ret; + PERROR("pthread_join listener_thread"); + retval = -1; } -exit_listener: +exit_listener_thread: ret = pthread_join(worker_thread, &status); - if (ret != 0) { - PERROR("pthread_join"); - goto error; /* join error, exit without cleanup */ + if (ret) { + errno = ret; + PERROR("pthread_join worker_thread"); + retval = -1; } -exit_worker: +exit_worker_thread: ret = pthread_join(dispatcher_thread, &status); - if (ret != 0) { - PERROR("pthread_join"); - goto error; /* join error, exit without cleanup */ + if (ret) { + errno = ret; + PERROR("pthread_join dispatcher_thread"); + retval = -1; } +exit_dispatcher_thread: -exit_dispatcher: ret = pthread_join(health_thread, &status); - if (ret != 0) { - PERROR("pthread_join health thread"); - goto error; /* join error, exit without cleanup */ + if (ret) { + errno = ret; + PERROR("pthread_join health_thread"); + retval = -1; } +exit_health_thread: - /* - * Stop live threads only after joining other threads. - */ - live_stop_threads(); - -health_error: utils_close_pipe(health_quit_pipe); +exit_health_quit_pipe: -error_health_pipe: +exit_init_data: health_app_destroy(health_relayd); - exit_health_app_create: - lttng_ht_destroy(viewer_streams_ht); - -exit_relay_ctx_viewer_streams: - lttng_ht_destroy(relay_streams_ht); - -exit_relay_ctx_streams: - lttng_ht_destroy(relay_ctx->sessions_ht); - -exit_relay_ctx_sessions: - free(relay_ctx); +exit_options: + relayd_cleanup(relay_ctx); -exit: - cleanup(); - if (!ret) { + if (!retval) { exit(EXIT_SUCCESS); + } else { + exit(EXIT_FAILURE); } - -error: - exit(EXIT_FAILURE); }