X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-relayd%2Fmain.c;h=2a7152b892192f7e683bf881b2b106af0a2da36c;hp=fdddcbe6efcf54309078b3ad67917b0f46b59bbf;hb=f50f23d9f80ed9fae7fe5c49aee65e813e0031c8;hpb=1d4dfdefb6efe31cd6436931f893caf7b05d0223 diff --git a/src/bin/lttng-relayd/main.c b/src/bin/lttng-relayd/main.c index fdddcbe6e..2a7152b89 100644 --- a/src/bin/lttng-relayd/main.c +++ b/src/bin/lttng-relayd/main.c @@ -239,8 +239,8 @@ void cleanup(void) /* Close thread quit pipes */ utils_close_pipe(thread_quit_pipe); - /* Close relay cmd pipes */ - utils_close_pipe(relay_cmd_pipe); + uri_free(control_uri); + uri_free(data_uri); } /* @@ -472,12 +472,6 @@ void *relay_thread_listener(void *data) struct lttng_poll_event events; struct lttcomm_sock *control_sock, *data_sock; - /* - * Get allocated in this thread, enqueued to a global queue, dequeued and - * freed in the worker thread. - */ - struct relay_command *relay_cmd = NULL; - DBG("[thread] Relay listener started"); control_sock = relay_init_sock(control_uri); @@ -544,7 +538,13 @@ restart: ERR("socket poll error"); goto error; } else if (revents & LPOLLIN) { - struct lttcomm_sock *newsock = NULL; + /* + * Get allocated in this thread, + * enqueued to a global queue, dequeued + * and freed in the worker thread. + */ + struct relay_command *relay_cmd; + struct lttcomm_sock *newsock; relay_cmd = zmalloc(sizeof(struct relay_command)); if (relay_cmd == NULL) { @@ -554,16 +554,19 @@ restart: if (pollfd == data_sock->fd) { newsock = data_sock->ops->accept(data_sock); - if (newsock < 0) { + if (!newsock) { PERROR("accepting data sock"); + free(relay_cmd); goto error; } relay_cmd->type = RELAY_DATA; DBG("Relay data connection accepted, socket %d", newsock->fd); - } else if (pollfd == control_sock->fd) { + } else { + assert(pollfd == control_sock->fd); newsock = control_sock->ops->accept(control_sock); - if (newsock < 0) { + if (!newsock) { PERROR("accepting control sock"); + free(relay_cmd); goto error; } relay_cmd->type = RELAY_CONTROL; @@ -573,6 +576,8 @@ restart: &val, sizeof(int)); if (ret < 0) { PERROR("setsockopt inet"); + lttcomm_destroy_sock(newsock); + free(relay_cmd); goto error; } relay_cmd->sock = newsock; @@ -879,7 +884,8 @@ void relay_delete_session(struct relay_command *cmd, struct lttng_ht *streams_ht } DBG("Relay deleting session %" PRIu64, cmd->session->id); - free(cmd->session->sock); + + lttcomm_destroy_sock(cmd->session->sock); rcu_read_lock(); cds_lfht_for_each_entry(streams_ht->ht, &iter.iter, node, node) { @@ -888,7 +894,10 @@ void relay_delete_session(struct relay_command *cmd, struct lttng_ht *streams_ht stream = caa_container_of(node, struct relay_stream, stream_n); if (stream->session == cmd->session) { - close(stream->fd); + ret = close(stream->fd); + if (ret < 0) { + PERROR("close stream fd on delete session"); + } ret = lttng_ht_del(streams_ht, &iter); assert(!ret); call_rcu(&stream->rcu_node, @@ -897,6 +906,8 @@ void relay_delete_session(struct relay_command *cmd, struct lttng_ht *streams_ht } } rcu_read_unlock(); + + free(cmd->session); } /* @@ -921,7 +932,7 @@ int relay_add_stream(struct lttcomm_relayd_hdr *recv_hdr, /* FIXME : use data_size for something ? */ ret = cmd->sock->ops->recvmsg(cmd->sock, &stream_info, - sizeof(struct lttcomm_relayd_add_stream), MSG_WAITALL); + sizeof(struct lttcomm_relayd_add_stream), 0); if (ret < sizeof(struct lttcomm_relayd_add_stream)) { ERR("Relay didn't receive valid add_stream struct size : %d", ret); ret = -1; @@ -1017,7 +1028,7 @@ int relay_close_stream(struct lttcomm_relayd_hdr *recv_hdr, } ret = cmd->sock->ops->recvmsg(cmd->sock, &stream_info, - sizeof(struct lttcomm_relayd_close_stream), MSG_WAITALL); + sizeof(struct lttcomm_relayd_close_stream), 0); if (ret < sizeof(struct lttcomm_relayd_close_stream)) { ERR("Relay didn't receive valid add_stream struct size : %d", ret); ret = -1; @@ -1041,12 +1052,16 @@ int relay_close_stream(struct lttcomm_relayd_hdr *recv_hdr, goto end_unlock; } + stream->last_net_seq_num = be64toh(stream_info.last_net_seq_num); stream->close_flag = 1; if (close_stream_check(stream)) { int delret; - close(stream->fd); + delret = close(stream->fd); + if (delret < 0) { + PERROR("close stream"); + } delret = lttng_ht_del(streams_ht, &iter); assert(!delret); call_rcu(&stream->rcu_node, @@ -1170,6 +1185,8 @@ static int write_padding_to_file(int fd, uint32_t size) PERROR("write padding to file"); } + free(zeros); + end: return ret; } @@ -1202,9 +1219,12 @@ int relay_recv_metadata(struct lttcomm_relayd_hdr *recv_hdr, payload_size -= sizeof(struct lttcomm_relayd_metadata_payload); if (data_buffer_size < data_size) { + /* In case the realloc fails, we can free the memory */ + char *tmp_data_ptr = data_buffer; data_buffer = realloc(data_buffer, data_size); if (!data_buffer) { ERR("Allocating data buffer"); + free(tmp_data_ptr); ret = -1; goto end; } @@ -1212,8 +1232,7 @@ int relay_recv_metadata(struct lttcomm_relayd_hdr *recv_hdr, } memset(data_buffer, 0, data_size); DBG2("Relay receiving metadata, waiting for %" PRIu64 " bytes", data_size); - ret = cmd->sock->ops->recvmsg(cmd->sock, data_buffer, data_size, - MSG_WAITALL); + ret = cmd->sock->ops->recvmsg(cmd->sock, data_buffer, data_size, 0); if (ret < 0 || ret != data_size) { ret = -1; ERR("Relay didn't receive the whole metadata"); @@ -1260,9 +1279,9 @@ static int relay_send_version(struct lttcomm_relayd_hdr *recv_hdr, struct relay_command *cmd) { - int ret = htobe32(LTTNG_OK); - struct lttcomm_relayd_version reply; - struct relay_session *session = NULL; + int ret; + struct lttcomm_relayd_version reply, msg; + struct relay_session *session; if (cmd->session == NULL) { session = zmalloc(sizeof(struct relay_session)); @@ -1274,10 +1293,32 @@ int relay_send_version(struct lttcomm_relayd_hdr *recv_hdr, session->id = ++last_relay_session_id; DBG("Created session %" PRIu64, session->id); cmd->session = session; + } else { + session = cmd->session; } session->version_check_done = 1; - sscanf(VERSION, "%u.%u", &reply.major, &reply.minor); + /* Get version from the other side. */ + ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0); + if (ret < 0 || ret != sizeof(msg)) { + ret = -1; + ERR("Relay failed to receive the version values."); + goto end; + } + + /* + * For now, we just ignore the received version but after 2.1 stable + * release, a check must be done to see if we either adapt to the other + * side version (which MUST be lower than us) or keep the latest data + * structure considering that the other side will adapt. + */ + + ret = sscanf(VERSION, "%u.%u", &reply.major, &reply.minor); + if (ret < 2) { + ERR("Error in scanning version"); + ret = -1; + goto end; + } reply.major = htobe32(reply.major); reply.minor = htobe32(reply.minor); ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, @@ -1292,6 +1333,103 @@ end: return ret; } +/* + * Check for data pending for a given stream id from the session daemon. + */ +static +int relay_data_pending(struct lttcomm_relayd_hdr *recv_hdr, + struct relay_command *cmd, struct lttng_ht *streams_ht) +{ + struct relay_session *session = cmd->session; + struct lttcomm_relayd_data_pending msg; + struct lttcomm_relayd_generic_reply reply; + struct relay_stream *stream; + int ret; + struct lttng_ht_node_ulong *node; + struct lttng_ht_iter iter; + uint64_t last_net_seq_num, stream_id; + + DBG("Data pending command received"); + + if (!session || session->version_check_done == 0) { + ERR("Trying to check for data before version check"); + ret = -1; + goto end_no_session; + } + + ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0); + if (ret < sizeof(msg)) { + ERR("Relay didn't receive valid data_pending struct size : %d", ret); + ret = -1; + goto end_no_session; + } + + stream_id = be64toh(msg.stream_id); + last_net_seq_num = be64toh(msg.last_net_seq_num); + + rcu_read_lock(); + lttng_ht_lookup(streams_ht, (void *)((unsigned long) stream_id), &iter); + node = lttng_ht_iter_get_node_ulong(&iter); + if (node == NULL) { + DBG("Relay stream %" PRIu64 " not found", stream_id); + ret = -1; + goto end_unlock; + } + + stream = caa_container_of(node, struct relay_stream, stream_n); + assert(stream); + + DBG("Data pending for stream id %" PRIu64 " prev_seq %" PRIu64 + " and last_seq %" PRIu64, stream_id, stream->prev_seq, + last_net_seq_num); + + /* Avoid wrapping issue */ + if (((int64_t) (stream->prev_seq - last_net_seq_num)) <= 0) { + /* Data has in fact been written and is NOT pending */ + ret = 0; + } else { + /* Data still being streamed thus pending */ + ret = 1; + } + +end_unlock: + rcu_read_unlock(); + + reply.ret_code = htobe32(ret); + ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0); + if (ret < 0) { + ERR("Relay data pending ret code failed"); + } + +end_no_session: + return ret; +} + +/* + * Wait for the control socket to reach a quiescent state. + * + * Note that for now, when receiving this command from the session daemon, this + * means that every subsequent commands or data received on the control socket + * has been handled. So, this is why we simply return OK here. + */ +static +int relay_quiescent_control(struct lttcomm_relayd_hdr *recv_hdr, + struct relay_command *cmd) +{ + int ret; + struct lttcomm_relayd_generic_reply reply; + + DBG("Checking quiescent state on control socket"); + + reply.ret_code = htobe32(LTTNG_OK); + ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0); + if (ret < 0) { + ERR("Relay data quiescent control ret code failed"); + } + + return ret; +} + /* * relay_process_control: Process the commands received on the control socket */ @@ -1322,6 +1460,12 @@ int relay_process_control(struct lttcomm_relayd_hdr *recv_hdr, case RELAYD_CLOSE_STREAM: ret = relay_close_stream(recv_hdr, cmd, streams_ht); break; + case RELAYD_DATA_PENDING: + ret = relay_data_pending(recv_hdr, cmd, streams_ht); + break; + case RELAYD_QUIESCENT_CONTROL: + ret = relay_quiescent_control(recv_hdr, cmd); + break; case RELAYD_UPDATE_SYNC_INFO: default: ERR("Received unknown command (%u)", be32toh(recv_hdr->cmd)); @@ -1348,7 +1492,7 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *streams_ht) uint32_t data_size; ret = cmd->sock->ops->recvmsg(cmd->sock, &data_hdr, - sizeof(struct lttcomm_relayd_data_hdr), MSG_WAITALL); + sizeof(struct lttcomm_relayd_data_hdr), 0); if (ret <= 0) { ERR("Connections seems to be closed"); ret = -1; @@ -1366,9 +1510,11 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *streams_ht) data_size = be32toh(data_hdr.data_size); if (data_buffer_size < data_size) { + char *tmp_data_ptr = data_buffer; data_buffer = realloc(data_buffer, data_size); if (!data_buffer) { ERR("Allocating data buffer"); + free(tmp_data_ptr); ret = -1; goto end_unlock; } @@ -1380,7 +1526,7 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *streams_ht) DBG3("Receiving data of size %u for stream id %" PRIu64 " seqnum %" PRIu64, data_size, stream_id, net_seq_num); - ret = cmd->sock->ops->recvmsg(cmd->sock, data_buffer, data_size, MSG_WAITALL); + ret = cmd->sock->ops->recvmsg(cmd->sock, data_buffer, data_size, 0); if (ret <= 0) { ret = -1; goto end_unlock; @@ -1395,21 +1541,25 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *streams_ht) goto end_unlock; } + DBG2("Relay wrote %d bytes to tracefile for stream id %" PRIu64, + ret, stream->stream_handle); + ret = write_padding_to_file(stream->fd, be32toh(data_hdr.padding_size)); if (ret < 0) { goto end_unlock; } - DBG2("Relay wrote %d bytes to tracefile for stream id %" PRIu64, - ret, stream->stream_handle); - stream->prev_seq = net_seq_num; /* Check if we need to close the FD */ if (close_stream_check(stream)) { + int cret; struct lttng_ht_iter iter; - close(stream->fd); + cret = close(stream->fd); + if (cret < 0) { + PERROR("close stream process data"); + } iter.iter.node = &stream->stream_n.node; ret = lttng_ht_del(streams_ht, &iter); assert(!ret); @@ -1476,6 +1626,8 @@ void deferred_free_connection(struct rcu_head *head) { struct relay_command *relay_connection = caa_container_of(head, struct relay_command, rcu_node); + + lttcomm_destroy_sock(relay_connection->sock); free(relay_connection); } @@ -1491,6 +1643,7 @@ void relay_del_connection(struct lttng_ht *relay_connections_ht, if (relay_connection->type == RELAY_CONTROL) { relay_delete_session(relay_connection, streams_ht); } + call_rcu(&relay_connection->rcu_node, deferred_free_connection); } @@ -1613,7 +1766,7 @@ void *relay_thread_worker(void *data) if (relay_connection->type == RELAY_CONTROL) { ret = relay_connection->sock->ops->recvmsg( relay_connection->sock, &recv_hdr, - sizeof(struct lttcomm_relayd_hdr), MSG_WAITALL); + sizeof(struct lttcomm_relayd_hdr), 0); /* connection closed */ if (ret <= 0) { relay_cleanup_poll_connection(&events, pollfd); @@ -1681,6 +1834,8 @@ error_poll_create: streams_ht_error: lttng_ht_destroy(relay_connections_ht); relay_connections_ht_error: + /* Close relay cmd pipes */ + utils_close_pipe(relay_cmd_pipe); if (err) { DBG("Thread exited with error"); }