X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-relayd%2Fmain.c;h=bd19a323715309697ba3b3ff61c6d29a615ad379;hp=1ef6881c09f8c4e63efab37f257f5869e80ba1ab;hb=092b625914723e40ccb3345826a2280cbd01f8b8;hpb=f64161251bd649abe5b6a473531adfa3af9bd6b6 diff --git a/src/bin/lttng-relayd/main.c b/src/bin/lttng-relayd/main.c index 1ef6881c0..bd19a3237 100644 --- a/src/bin/lttng-relayd/main.c +++ b/src/bin/lttng-relayd/main.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -238,8 +239,8 @@ void cleanup(void) /* Close thread quit pipes */ utils_close_pipe(thread_quit_pipe); - /* Close relay cmd pipes */ - utils_close_pipe(relay_cmd_pipe); + uri_free(control_uri); + uri_free(data_uri); } /* @@ -446,6 +447,19 @@ error: return NULL; } +/* + * Return nonzero if stream needs to be closed. + */ +static +int close_stream_check(struct relay_stream *stream) +{ + + if (stream->close_flag && stream->prev_seq == stream->last_net_seq_num) { + return 1; + } + return 0; +} + /* * This thread manages the listening for new connections on the network */ @@ -458,12 +472,6 @@ void *relay_thread_listener(void *data) struct lttng_poll_event events; struct lttcomm_sock *control_sock, *data_sock; - /* - * Get allocated in this thread, enqueued to a global queue, dequeued and - * freed in the worker thread. - */ - struct relay_command *relay_cmd = NULL; - DBG("[thread] Relay listener started"); control_sock = relay_init_sock(control_uri); @@ -530,7 +538,13 @@ restart: ERR("socket poll error"); goto error; } else if (revents & LPOLLIN) { - struct lttcomm_sock *newsock = NULL; + /* + * Get allocated in this thread, + * enqueued to a global queue, dequeued + * and freed in the worker thread. + */ + struct relay_command *relay_cmd; + struct lttcomm_sock *newsock; relay_cmd = zmalloc(sizeof(struct relay_command)); if (relay_cmd == NULL) { @@ -540,16 +554,19 @@ restart: if (pollfd == data_sock->fd) { newsock = data_sock->ops->accept(data_sock); - if (newsock < 0) { + if (!newsock) { PERROR("accepting data sock"); + free(relay_cmd); goto error; } relay_cmd->type = RELAY_DATA; DBG("Relay data connection accepted, socket %d", newsock->fd); - } else if (pollfd == control_sock->fd) { + } else { + assert(pollfd == control_sock->fd); newsock = control_sock->ops->accept(control_sock); - if (newsock < 0) { + if (!newsock) { PERROR("accepting control sock"); + free(relay_cmd); goto error; } relay_cmd->type = RELAY_CONTROL; @@ -559,6 +576,8 @@ restart: &val, sizeof(int)); if (ret < 0) { PERROR("setsockopt inet"); + lttcomm_destroy_sock(newsock); + free(relay_cmd); goto error; } relay_cmd->sock = newsock; @@ -864,8 +883,9 @@ void relay_delete_session(struct relay_command *cmd, struct lttng_ht *streams_ht return; } - DBG("Relay deleting session %lu", cmd->session->id); - free(cmd->session->sock); + DBG("Relay deleting session %" PRIu64, cmd->session->id); + + lttcomm_destroy_sock(cmd->session->sock); rcu_read_lock(); cds_lfht_for_each_entry(streams_ht->ht, &iter.iter, node, node) { @@ -874,7 +894,10 @@ void relay_delete_session(struct relay_command *cmd, struct lttng_ht *streams_ht stream = caa_container_of(node, struct relay_stream, stream_n); if (stream->session == cmd->session) { - close(stream->fd); + ret = close(stream->fd); + if (ret < 0) { + PERROR("close stream fd on delete session"); + } ret = lttng_ht_del(streams_ht, &iter); assert(!ret); call_rcu(&stream->rcu_node, @@ -883,6 +906,8 @@ void relay_delete_session(struct relay_command *cmd, struct lttng_ht *streams_ht } } rcu_read_unlock(); + + free(cmd->session); } /* @@ -922,7 +947,7 @@ int relay_add_stream(struct lttcomm_relayd_hdr *recv_hdr, rcu_read_lock(); stream->stream_handle = ++last_relay_stream_id; - stream->seq = 0; + stream->prev_seq = -1ULL; stream->session = session; root_path = create_output_path(stream_info.pathname); @@ -963,9 +988,9 @@ end: free(root_path); /* send the session id to the client or a negative return code on error */ if (ret < 0) { - reply.ret_code = htobe32(LTTCOMM_ERR); + reply.ret_code = htobe32(LTTNG_ERR_UNK); } else { - reply.ret_code = htobe32(LTTCOMM_OK); + reply.ret_code = htobe32(LTTNG_OK); } reply.handle = htobe64(stream->stream_handle); send_ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, @@ -979,6 +1004,89 @@ end_no_session: return ret; } +/* + * relay_close_stream: close a specific stream + */ +static +int relay_close_stream(struct lttcomm_relayd_hdr *recv_hdr, + struct relay_command *cmd, struct lttng_ht *streams_ht) +{ + struct relay_session *session = cmd->session; + struct lttcomm_relayd_close_stream stream_info; + struct lttcomm_relayd_generic_reply reply; + struct relay_stream *stream; + int ret, send_ret; + struct lttng_ht_node_ulong *node; + struct lttng_ht_iter iter; + + DBG("Close stream received"); + + if (!session || session->version_check_done == 0) { + ERR("Trying to close a stream before version check"); + ret = -1; + goto end_no_session; + } + + ret = cmd->sock->ops->recvmsg(cmd->sock, &stream_info, + sizeof(struct lttcomm_relayd_close_stream), MSG_WAITALL); + if (ret < sizeof(struct lttcomm_relayd_close_stream)) { + ERR("Relay didn't receive valid add_stream struct size : %d", ret); + ret = -1; + goto end_no_session; + } + + rcu_read_lock(); + lttng_ht_lookup(streams_ht, + (void *)((unsigned long) be64toh(stream_info.stream_id)), + &iter); + node = lttng_ht_iter_get_node_ulong(&iter); + if (node == NULL) { + DBG("Relay stream %" PRIu64 " not found", be64toh(stream_info.stream_id)); + ret = -1; + goto end_unlock; + } + + stream = caa_container_of(node, struct relay_stream, stream_n); + if (!stream) { + ret = -1; + goto end_unlock; + } + + stream->last_net_seq_num = be64toh(stream_info.last_net_seq_num); + stream->close_flag = 1; + + if (close_stream_check(stream)) { + int delret; + + delret = close(stream->fd); + if (delret < 0) { + PERROR("close stream"); + } + delret = lttng_ht_del(streams_ht, &iter); + assert(!delret); + call_rcu(&stream->rcu_node, + deferred_free_stream); + DBG("Closed tracefile %d from close stream", stream->fd); + } + +end_unlock: + rcu_read_unlock(); + + if (ret < 0) { + reply.ret_code = htobe32(LTTNG_ERR_UNK); + } else { + reply.ret_code = htobe32(LTTNG_OK); + } + send_ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, + sizeof(struct lttcomm_relayd_generic_reply), 0); + if (send_ret < 0) { + ERR("Relay sending stream id"); + } + +end_no_session: + return ret; +} + /* * relay_unknown_command: send -1 if received unknown command */ @@ -988,7 +1096,7 @@ void relay_unknown_command(struct relay_command *cmd) struct lttcomm_relayd_generic_reply reply; int ret; - reply.ret_code = htobe32(LTTCOMM_ERR); + reply.ret_code = htobe32(LTTNG_ERR_UNK); ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(struct lttcomm_relayd_generic_reply), 0); if (ret < 0) { @@ -1004,13 +1112,13 @@ static int relay_start(struct lttcomm_relayd_hdr *recv_hdr, struct relay_command *cmd) { - int ret = htobe32(LTTCOMM_OK); + int ret = htobe32(LTTNG_OK); struct lttcomm_relayd_generic_reply reply; struct relay_session *session = cmd->session; if (!session) { DBG("Trying to start the streaming without a session established"); - ret = htobe32(LTTCOMM_ERR); + ret = htobe32(LTTNG_ERR_UNK); } reply.ret_code = ret; @@ -1040,7 +1148,7 @@ struct relay_stream *relay_stream_from_stream_id(uint64_t stream_id, &iter); node = lttng_ht_iter_get_node_ulong(&iter); if (node == NULL) { - DBG("Relay stream %lu not found", stream_id); + DBG("Relay stream %" PRIu64 " not found", stream_id); ret = NULL; goto end; } @@ -1051,6 +1159,38 @@ end: return ret; } +/* + * Append padding to the file pointed by the file descriptor fd. + */ +static int write_padding_to_file(int fd, uint32_t size) +{ + int ret = 0; + char *zeros; + + if (size == 0) { + goto end; + } + + zeros = zmalloc(size); + if (zeros == NULL) { + PERROR("zmalloc zeros for padding"); + ret = -1; + goto end; + } + + do { + ret = write(fd, zeros, size); + } while (ret < 0 && errno == EINTR); + if (ret < 0) { + PERROR("write padding to file"); + } + + free(zeros); + +end: + return ret; +} + /* * relay_recv_metadata: receive the metada for the session. */ @@ -1058,7 +1198,7 @@ static int relay_recv_metadata(struct lttcomm_relayd_hdr *recv_hdr, struct relay_command *cmd, struct lttng_ht *streams_ht) { - int ret = htobe32(LTTCOMM_OK); + int ret = htobe32(LTTNG_OK); struct relay_session *session = cmd->session; struct lttcomm_relayd_metadata_payload *metadata_struct; struct relay_stream *metadata_stream; @@ -1079,16 +1219,19 @@ int relay_recv_metadata(struct lttcomm_relayd_hdr *recv_hdr, payload_size -= sizeof(struct lttcomm_relayd_metadata_payload); if (data_buffer_size < data_size) { + /* In case the realloc fails, we can free the memory */ + char *tmp_data_ptr = data_buffer; data_buffer = realloc(data_buffer, data_size); if (!data_buffer) { ERR("Allocating data buffer"); + free(tmp_data_ptr); ret = -1; goto end; } data_buffer_size = data_size; } memset(data_buffer, 0, data_size); - DBG2("Relay receiving metadata, waiting for %lu bytes", data_size); + DBG2("Relay receiving metadata, waiting for %" PRIu64 " bytes", data_size); ret = cmd->sock->ops->recvmsg(cmd->sock, data_buffer, data_size, MSG_WAITALL); if (ret < 0 || ret != data_size) { @@ -1115,6 +1258,13 @@ int relay_recv_metadata(struct lttcomm_relayd_hdr *recv_hdr, ret = -1; goto end_unlock; } + + ret = write_padding_to_file(metadata_stream->fd, + be32toh(metadata_struct->padding_size)); + if (ret < 0) { + goto end_unlock; + } + DBG2("Relay metadata written"); end_unlock: @@ -1130,9 +1280,9 @@ static int relay_send_version(struct lttcomm_relayd_hdr *recv_hdr, struct relay_command *cmd) { - int ret = htobe32(LTTCOMM_OK); - struct lttcomm_relayd_version reply; - struct relay_session *session = NULL; + int ret; + struct lttcomm_relayd_version reply, msg; + struct relay_session *session; if (cmd->session == NULL) { session = zmalloc(sizeof(struct relay_session)); @@ -1142,12 +1292,34 @@ int relay_send_version(struct lttcomm_relayd_hdr *recv_hdr, goto end; } session->id = ++last_relay_session_id; - DBG("Created session %lu", session->id); + DBG("Created session %" PRIu64, session->id); cmd->session = session; + } else { + session = cmd->session; } session->version_check_done = 1; - sscanf(VERSION, "%u.%u", &reply.major, &reply.minor); + /* Get version from the other side. */ + ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), MSG_WAITALL); + if (ret < 0 || ret != sizeof(msg)) { + ret = -1; + ERR("Relay failed to receive the version values."); + goto end; + } + + /* + * For now, we just ignore the received version but after 2.1 stable + * release, a check must be done to see if we either adapt to the other + * side version (which MUST be lower than us) or keep the latest data + * structure considering that the other side will adapt. + */ + + ret = sscanf(VERSION, "%u.%u", &reply.major, &reply.minor); + if (ret < 2) { + ERR("Error in scanning version"); + ret = -1; + goto end; + } reply.major = htobe32(reply.major); reply.minor = htobe32(reply.minor); ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, @@ -1155,12 +1327,110 @@ int relay_send_version(struct lttcomm_relayd_hdr *recv_hdr, if (ret < 0) { ERR("Relay sending version"); } - DBG("Version check done"); + DBG("Version check done (%u.%u)", be32toh(reply.major), + be32toh(reply.minor)); end: return ret; } +/* + * Check for data pending for a given stream id from the session daemon. + */ +static +int relay_data_pending(struct lttcomm_relayd_hdr *recv_hdr, + struct relay_command *cmd, struct lttng_ht *streams_ht) +{ + struct relay_session *session = cmd->session; + struct lttcomm_relayd_data_pending msg; + struct lttcomm_relayd_generic_reply reply; + struct relay_stream *stream; + int ret; + struct lttng_ht_node_ulong *node; + struct lttng_ht_iter iter; + uint64_t last_net_seq_num, stream_id; + + DBG("Data pending command received"); + + if (!session || session->version_check_done == 0) { + ERR("Trying to check for data before version check"); + ret = -1; + goto end_no_session; + } + + ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), MSG_WAITALL); + if (ret < sizeof(msg)) { + ERR("Relay didn't receive valid data_pending struct size : %d", ret); + ret = -1; + goto end_no_session; + } + + stream_id = be64toh(msg.stream_id); + last_net_seq_num = be64toh(msg.last_net_seq_num); + + rcu_read_lock(); + lttng_ht_lookup(streams_ht, (void *)((unsigned long) stream_id), &iter); + node = lttng_ht_iter_get_node_ulong(&iter); + if (node == NULL) { + DBG("Relay stream %" PRIu64 " not found", stream_id); + ret = -1; + goto end_unlock; + } + + stream = caa_container_of(node, struct relay_stream, stream_n); + assert(stream); + + DBG("Data pending for stream id %" PRIu64 " prev_seq %" PRIu64 + " and last_seq %" PRIu64, stream_id, stream->prev_seq, + last_net_seq_num); + + /* Avoid wrapping issue */ + if (((int64_t) (stream->prev_seq - last_net_seq_num)) <= 0) { + /* Data has in fact been written and is NOT pending */ + ret = 0; + } else { + /* Data still being streamed thus pending */ + ret = 1; + } + +end_unlock: + rcu_read_unlock(); + + reply.ret_code = htobe32(ret); + ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0); + if (ret < 0) { + ERR("Relay data pending ret code failed"); + } + +end_no_session: + return ret; +} + +/* + * Wait for the control socket to reach a quiescent state. + * + * Note that for now, when receiving this command from the session daemon, this + * means that every subsequent commands or data received on the control socket + * has been handled. So, this is why we simply return OK here. + */ +static +int relay_quiescent_control(struct lttcomm_relayd_hdr *recv_hdr, + struct relay_command *cmd) +{ + int ret; + struct lttcomm_relayd_generic_reply reply; + + DBG("Checking quiescent state on control socket"); + + reply.ret_code = htobe32(LTTNG_OK); + ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0); + if (ret < 0) { + ERR("Relay data quiescent control ret code failed"); + } + + return ret; +} + /* * relay_process_control: Process the commands received on the control socket */ @@ -1188,6 +1458,15 @@ int relay_process_control(struct lttcomm_relayd_hdr *recv_hdr, case RELAYD_VERSION: ret = relay_send_version(recv_hdr, cmd); break; + case RELAYD_CLOSE_STREAM: + ret = relay_close_stream(recv_hdr, cmd, streams_ht); + break; + case RELAYD_DATA_PENDING: + ret = relay_data_pending(recv_hdr, cmd, streams_ht); + break; + case RELAYD_QUIESCENT_CONTROL: + ret = relay_quiescent_control(recv_hdr, cmd); + break; case RELAYD_UPDATE_SYNC_INFO: default: ERR("Received unknown command (%u)", be32toh(recv_hdr->cmd)); @@ -1210,6 +1489,7 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *streams_ht) struct relay_stream *stream; struct lttcomm_relayd_data_hdr data_hdr; uint64_t stream_id; + uint64_t net_seq_num; uint32_t data_size; ret = cmd->sock->ops->recvmsg(cmd->sock, &data_hdr, @@ -1231,9 +1511,11 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *streams_ht) data_size = be32toh(data_hdr.data_size); if (data_buffer_size < data_size) { + char *tmp_data_ptr = data_buffer; data_buffer = realloc(data_buffer, data_size); if (!data_buffer) { ERR("Allocating data buffer"); + free(tmp_data_ptr); ret = -1; goto end_unlock; } @@ -1241,7 +1523,10 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *streams_ht) } memset(data_buffer, 0, data_size); - DBG3("Receiving data of size %u for stream id %zu", data_size, stream_id); + net_seq_num = be64toh(data_hdr.net_seq_num); + + DBG3("Receiving data of size %u for stream id %" PRIu64 " seqnum %" PRIu64, + data_size, stream_id, net_seq_num); ret = cmd->sock->ops->recvmsg(cmd->sock, data_buffer, data_size, MSG_WAITALL); if (ret <= 0) { ret = -1; @@ -1256,7 +1541,33 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *streams_ht) ret = -1; goto end_unlock; } - DBG2("Relay wrote %d bytes to tracefile for stream id %lu", ret, stream->stream_handle); + + DBG2("Relay wrote %d bytes to tracefile for stream id %" PRIu64, + ret, stream->stream_handle); + + ret = write_padding_to_file(stream->fd, be32toh(data_hdr.padding_size)); + if (ret < 0) { + goto end_unlock; + } + + stream->prev_seq = net_seq_num; + + /* Check if we need to close the FD */ + if (close_stream_check(stream)) { + int cret; + struct lttng_ht_iter iter; + + cret = close(stream->fd); + if (cret < 0) { + PERROR("close stream process data"); + } + iter.iter.node = &stream->stream_n.node; + ret = lttng_ht_del(streams_ht, &iter); + assert(!ret); + call_rcu(&stream->rcu_node, + deferred_free_stream); + DBG("Closed tracefile %d after recv data", stream->fd); + } end_unlock: rcu_read_unlock(); @@ -1290,7 +1601,7 @@ int relay_add_connection(int fd, struct lttng_poll_event *events, goto error; } ret = read(fd, relay_connection, sizeof(struct relay_command)); - if (ret < 0 || ret < sizeof(relay_connection)) { + if (ret < 0 || ret < sizeof(struct relay_command)) { PERROR("read relay cmd pipe"); goto error_read; } @@ -1316,6 +1627,8 @@ void deferred_free_connection(struct rcu_head *head) { struct relay_command *relay_connection = caa_container_of(head, struct relay_command, rcu_node); + + lttcomm_destroy_sock(relay_connection->sock); free(relay_connection); } @@ -1331,6 +1644,7 @@ void relay_del_connection(struct lttng_ht *relay_connections_ht, if (relay_connection->type == RELAY_CONTROL) { relay_delete_session(relay_connection, streams_ht); } + call_rcu(&relay_connection->rcu_node, deferred_free_connection); } @@ -1463,7 +1777,7 @@ void *relay_thread_worker(void *data) DBG("Control connection closed with %d", pollfd); } else { if (relay_connection->session) { - DBG2("Relay worker receiving data for session : %lu", + DBG2("Relay worker receiving data for session : %" PRIu64, relay_connection->session->id); } ret = relay_process_control(&recv_hdr, @@ -1521,6 +1835,8 @@ error_poll_create: streams_ht_error: lttng_ht_destroy(relay_connections_ht); relay_connections_ht_error: + /* Close relay cmd pipes */ + utils_close_pipe(relay_cmd_pipe); if (err) { DBG("Thread exited with error"); }