X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-relayd%2Fmain.c;h=0f81d556dd4df73a910471756b75cf6567581a36;hp=abefba8659d7cc429aa39588186e580de084d51b;hb=c8f59ee5fc11492ef472dc5cfd2fd2c4926b1787;hpb=6e3c5836f180eeee21271242f707f4b88a840570 diff --git a/src/bin/lttng-relayd/main.c b/src/bin/lttng-relayd/main.c index abefba865..0f81d556d 100644 --- a/src/bin/lttng-relayd/main.c +++ b/src/bin/lttng-relayd/main.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -446,6 +447,19 @@ error: return NULL; } +/* + * Return nonzero if stream needs to be closed. + */ +static +int close_stream_check(struct relay_stream *stream) +{ + + if (stream->close_flag && stream->prev_seq == stream->last_net_seq_num) { + return 1; + } + return 0; +} + /* * This thread manages the listening for new connections on the network */ @@ -458,12 +472,6 @@ void *relay_thread_listener(void *data) struct lttng_poll_event events; struct lttcomm_sock *control_sock, *data_sock; - /* - * Get allocated in this thread, enqueued to a global queue, dequeued and - * freed in the worker thread. - */ - struct relay_command *relay_cmd = NULL; - DBG("[thread] Relay listener started"); control_sock = relay_init_sock(control_uri); @@ -530,7 +538,13 @@ restart: ERR("socket poll error"); goto error; } else if (revents & LPOLLIN) { - struct lttcomm_sock *newsock = NULL; + /* + * Get allocated in this thread, + * enqueued to a global queue, dequeued + * and freed in the worker thread. + */ + struct relay_command *relay_cmd; + struct lttcomm_sock *newsock; relay_cmd = zmalloc(sizeof(struct relay_command)); if (relay_cmd == NULL) { @@ -540,16 +554,19 @@ restart: if (pollfd == data_sock->fd) { newsock = data_sock->ops->accept(data_sock); - if (newsock < 0) { + if (!newsock) { PERROR("accepting data sock"); + free(relay_cmd); goto error; } relay_cmd->type = RELAY_DATA; DBG("Relay data connection accepted, socket %d", newsock->fd); - } else if (pollfd == control_sock->fd) { + } else { + assert(pollfd == control_sock->fd); newsock = control_sock->ops->accept(control_sock); - if (newsock < 0) { + if (!newsock) { PERROR("accepting control sock"); + free(relay_cmd); goto error; } relay_cmd->type = RELAY_CONTROL; @@ -559,6 +576,8 @@ restart: &val, sizeof(int)); if (ret < 0) { PERROR("setsockopt inet"); + lttcomm_destroy_sock(newsock); + free(relay_cmd); goto error; } relay_cmd->sock = newsock; @@ -864,7 +883,7 @@ void relay_delete_session(struct relay_command *cmd, struct lttng_ht *streams_ht return; } - DBG("Relay deleting session %lu", cmd->session->id); + DBG("Relay deleting session %" PRIu64, cmd->session->id); free(cmd->session->sock); rcu_read_lock(); @@ -922,7 +941,7 @@ int relay_add_stream(struct lttcomm_relayd_hdr *recv_hdr, rcu_read_lock(); stream->stream_handle = ++last_relay_stream_id; - stream->seq = 0; + stream->prev_seq = -1ULL; stream->session = session; root_path = create_output_path(stream_info.pathname); @@ -963,9 +982,9 @@ end: free(root_path); /* send the session id to the client or a negative return code on error */ if (ret < 0) { - reply.ret_code = htobe32(LTTCOMM_ERR); + reply.ret_code = htobe32(LTTNG_ERR_UNK); } else { - reply.ret_code = htobe32(LTTCOMM_OK); + reply.ret_code = htobe32(LTTNG_OK); } reply.handle = htobe64(stream->stream_handle); send_ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, @@ -979,6 +998,86 @@ end_no_session: return ret; } +/* + * relay_close_stream: close a specific stream + */ +static +int relay_close_stream(struct lttcomm_relayd_hdr *recv_hdr, + struct relay_command *cmd, struct lttng_ht *streams_ht) +{ + struct relay_session *session = cmd->session; + struct lttcomm_relayd_close_stream stream_info; + struct lttcomm_relayd_generic_reply reply; + struct relay_stream *stream; + int ret, send_ret; + struct lttng_ht_node_ulong *node; + struct lttng_ht_iter iter; + + DBG("Close stream received"); + + if (!session || session->version_check_done == 0) { + ERR("Trying to close a stream before version check"); + ret = -1; + goto end_no_session; + } + + ret = cmd->sock->ops->recvmsg(cmd->sock, &stream_info, + sizeof(struct lttcomm_relayd_close_stream), MSG_WAITALL); + if (ret < sizeof(struct lttcomm_relayd_close_stream)) { + ERR("Relay didn't receive valid add_stream struct size : %d", ret); + ret = -1; + goto end_no_session; + } + + rcu_read_lock(); + lttng_ht_lookup(streams_ht, + (void *)((unsigned long) be64toh(stream_info.stream_id)), + &iter); + node = lttng_ht_iter_get_node_ulong(&iter); + if (node == NULL) { + DBG("Relay stream %" PRIu64 " not found", be64toh(stream_info.stream_id)); + ret = -1; + goto end_unlock; + } + + stream = caa_container_of(node, struct relay_stream, stream_n); + if (!stream) { + ret = -1; + goto end_unlock; + } + + stream->last_net_seq_num = be64toh(stream_info.last_net_seq_num); + stream->close_flag = 1; + + if (close_stream_check(stream)) { + int delret; + + close(stream->fd); + delret = lttng_ht_del(streams_ht, &iter); + assert(!delret); + call_rcu(&stream->rcu_node, + deferred_free_stream); + DBG("Closed tracefile %d from close stream", stream->fd); + } + +end_unlock: + rcu_read_unlock(); + + if (ret < 0) { + reply.ret_code = htobe32(LTTNG_ERR_UNK); + } else { + reply.ret_code = htobe32(LTTNG_OK); + } + send_ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, + sizeof(struct lttcomm_relayd_generic_reply), 0); + if (send_ret < 0) { + ERR("Relay sending stream id"); + } + +end_no_session: + return ret; +} + /* * relay_unknown_command: send -1 if received unknown command */ @@ -988,7 +1087,7 @@ void relay_unknown_command(struct relay_command *cmd) struct lttcomm_relayd_generic_reply reply; int ret; - reply.ret_code = htobe32(LTTCOMM_ERR); + reply.ret_code = htobe32(LTTNG_ERR_UNK); ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(struct lttcomm_relayd_generic_reply), 0); if (ret < 0) { @@ -1004,13 +1103,13 @@ static int relay_start(struct lttcomm_relayd_hdr *recv_hdr, struct relay_command *cmd) { - int ret = htobe32(LTTCOMM_OK); + int ret = htobe32(LTTNG_OK); struct lttcomm_relayd_generic_reply reply; struct relay_session *session = cmd->session; if (!session) { DBG("Trying to start the streaming without a session established"); - ret = htobe32(LTTCOMM_ERR); + ret = htobe32(LTTNG_ERR_UNK); } reply.ret_code = ret; @@ -1040,7 +1139,7 @@ struct relay_stream *relay_stream_from_stream_id(uint64_t stream_id, &iter); node = lttng_ht_iter_get_node_ulong(&iter); if (node == NULL) { - DBG("Relay stream %lu not found", stream_id); + DBG("Relay stream %" PRIu64 " not found", stream_id); ret = NULL; goto end; } @@ -1051,6 +1150,36 @@ end: return ret; } +/* + * Append padding to the file pointed by the file descriptor fd. + */ +static int write_padding_to_file(int fd, uint32_t size) +{ + int ret = 0; + char *zeros; + + if (size == 0) { + goto end; + } + + zeros = zmalloc(size); + if (zeros == NULL) { + PERROR("zmalloc zeros for padding"); + ret = -1; + goto end; + } + + do { + ret = write(fd, zeros, size); + } while (ret < 0 && errno == EINTR); + if (ret < 0) { + PERROR("write padding to file"); + } + +end: + return ret; +} + /* * relay_recv_metadata: receive the metada for the session. */ @@ -1058,7 +1187,7 @@ static int relay_recv_metadata(struct lttcomm_relayd_hdr *recv_hdr, struct relay_command *cmd, struct lttng_ht *streams_ht) { - int ret = htobe32(LTTCOMM_OK); + int ret = htobe32(LTTNG_OK); struct relay_session *session = cmd->session; struct lttcomm_relayd_metadata_payload *metadata_struct; struct relay_stream *metadata_stream; @@ -1070,8 +1199,14 @@ int relay_recv_metadata(struct lttcomm_relayd_hdr *recv_hdr, goto end; } - data_size = be64toh(recv_hdr->data_size); - payload_size = data_size - sizeof(uint64_t); + data_size = payload_size = be64toh(recv_hdr->data_size); + if (data_size < sizeof(struct lttcomm_relayd_metadata_payload)) { + ERR("Incorrect data size"); + ret = -1; + goto end; + } + payload_size -= sizeof(struct lttcomm_relayd_metadata_payload); + if (data_buffer_size < data_size) { data_buffer = realloc(data_buffer, data_size); if (!data_buffer) { @@ -1082,8 +1217,9 @@ int relay_recv_metadata(struct lttcomm_relayd_hdr *recv_hdr, data_buffer_size = data_size; } memset(data_buffer, 0, data_size); - DBG2("Relay receiving metadata, waiting for %lu bytes", data_size); - ret = cmd->sock->ops->recvmsg(cmd->sock, data_buffer, data_size, MSG_WAITALL); + DBG2("Relay receiving metadata, waiting for %" PRIu64 " bytes", data_size); + ret = cmd->sock->ops->recvmsg(cmd->sock, data_buffer, data_size, + MSG_WAITALL); if (ret < 0 || ret != data_size) { ret = -1; ERR("Relay didn't receive the whole metadata"); @@ -1103,11 +1239,18 @@ int relay_recv_metadata(struct lttcomm_relayd_hdr *recv_hdr, ret = write(metadata_stream->fd, metadata_struct->payload, payload_size); } while (ret < 0 && errno == EINTR); - if (ret < (payload_size)) { + if (ret < payload_size) { ERR("Relay error writing metadata on file"); ret = -1; goto end_unlock; } + + ret = write_padding_to_file(metadata_stream->fd, + be32toh(metadata_struct->padding_size)); + if (ret < 0) { + goto end_unlock; + } + DBG2("Relay metadata written"); end_unlock: @@ -1123,9 +1266,9 @@ static int relay_send_version(struct lttcomm_relayd_hdr *recv_hdr, struct relay_command *cmd) { - int ret = htobe32(LTTCOMM_OK); + int ret; struct lttcomm_relayd_version reply; - struct relay_session *session = NULL; + struct relay_session *session; if (cmd->session == NULL) { session = zmalloc(sizeof(struct relay_session)); @@ -1135,12 +1278,19 @@ int relay_send_version(struct lttcomm_relayd_hdr *recv_hdr, goto end; } session->id = ++last_relay_session_id; - DBG("Created session %lu", session->id); + DBG("Created session %" PRIu64, session->id); cmd->session = session; + } else { + session = cmd->session; } session->version_check_done = 1; - sscanf(VERSION, "%u.%u", &reply.major, &reply.minor); + ret = sscanf(VERSION, "%u.%u", &reply.major, &reply.minor); + if (ret < 2) { + ERR("Error in scanning version"); + ret = -1; + goto end; + } reply.major = htobe32(reply.major); reply.minor = htobe32(reply.minor); ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, @@ -1148,12 +1298,109 @@ int relay_send_version(struct lttcomm_relayd_hdr *recv_hdr, if (ret < 0) { ERR("Relay sending version"); } - DBG("Version check done"); + DBG("Version check done (%u.%u)", be32toh(reply.major), + be32toh(reply.minor)); end: return ret; } +/* + * Check for data availability for a given stream id from the session daemon. + */ +static +int relay_data_available(struct lttcomm_relayd_hdr *recv_hdr, + struct relay_command *cmd, struct lttng_ht *streams_ht) +{ + struct relay_session *session = cmd->session; + struct lttcomm_relayd_data_available msg; + struct lttcomm_relayd_generic_reply reply; + struct relay_stream *stream; + int ret; + struct lttng_ht_node_ulong *node; + struct lttng_ht_iter iter; + uint64_t last_net_seq_num, stream_id; + + DBG("Data available command received"); + + if (!session || session->version_check_done == 0) { + ERR("Trying to check for data before version check"); + ret = -1; + goto end_no_session; + } + + ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), MSG_WAITALL); + if (ret < sizeof(msg)) { + ERR("Relay didn't receive valid data_available struct size : %d", ret); + ret = -1; + goto end_no_session; + } + + stream_id = be64toh(msg.stream_id); + last_net_seq_num = be64toh(msg.last_net_seq_num); + + rcu_read_lock(); + lttng_ht_lookup(streams_ht, (void *)((unsigned long) stream_id), &iter); + node = lttng_ht_iter_get_node_ulong(&iter); + if (node == NULL) { + DBG("Relay stream %" PRIu64 " not found", stream_id); + ret = -1; + goto end_unlock; + } + + stream = caa_container_of(node, struct relay_stream, stream_n); + assert(stream); + + DBG("Data available for stream id %" PRIu64 " prev_seq %" PRIu64 + " and last_seq %" PRIu64, stream_id, stream->prev_seq, + last_net_seq_num); + + if (stream->prev_seq == -1UL || stream->prev_seq <= last_net_seq_num) { + /* Data has in fact been written and is available */ + ret = 1; + } else { + /* Data still being streamed. */ + ret = 0; + } + +end_unlock: + rcu_read_unlock(); + + reply.ret_code = htobe32(ret); + ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0); + if (ret < 0) { + ERR("Relay data available ret code failed"); + } + +end_no_session: + return ret; +} + +/* + * Wait for the control socket to reach a quiescent state. + * + * Note that for now, when receiving this command from the session daemon, this + * means that every subsequent commands or data received on the control socket + * has been handled. So, this is why we simply return OK here. + */ +static +int relay_quiescent_control(struct lttcomm_relayd_hdr *recv_hdr, + struct relay_command *cmd) +{ + int ret; + struct lttcomm_relayd_generic_reply reply; + + DBG("Checking quiescent state on control socket"); + + reply.ret_code = htobe32(LTTNG_OK); + ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0); + if (ret < 0) { + ERR("Relay data available ret code failed"); + } + + return ret; +} + /* * relay_process_control: Process the commands received on the control socket */ @@ -1181,6 +1428,15 @@ int relay_process_control(struct lttcomm_relayd_hdr *recv_hdr, case RELAYD_VERSION: ret = relay_send_version(recv_hdr, cmd); break; + case RELAYD_CLOSE_STREAM: + ret = relay_close_stream(recv_hdr, cmd, streams_ht); + break; + case RELAYD_DATA_AVAILABLE: + ret = relay_data_available(recv_hdr, cmd, streams_ht); + break; + case RELAYD_QUIESCENT_CONTROL: + ret = relay_quiescent_control(recv_hdr, cmd); + break; case RELAYD_UPDATE_SYNC_INFO: default: ERR("Received unknown command (%u)", be32toh(recv_hdr->cmd)); @@ -1203,6 +1459,7 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *streams_ht) struct relay_stream *stream; struct lttcomm_relayd_data_hdr data_hdr; uint64_t stream_id; + uint64_t net_seq_num; uint32_t data_size; ret = cmd->sock->ops->recvmsg(cmd->sock, &data_hdr, @@ -1234,6 +1491,10 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *streams_ht) } memset(data_buffer, 0, data_size); + net_seq_num = be64toh(data_hdr.net_seq_num); + + DBG3("Receiving data of size %u for stream id %" PRIu64 " seqnum %" PRIu64, + data_size, stream_id, net_seq_num); ret = cmd->sock->ops->recvmsg(cmd->sock, data_buffer, data_size, MSG_WAITALL); if (ret <= 0) { ret = -1; @@ -1248,7 +1509,29 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *streams_ht) ret = -1; goto end_unlock; } - DBG2("Relay wrote %d bytes to tracefile for stream id %lu", ret, stream->stream_handle); + + DBG2("Relay wrote %d bytes to tracefile for stream id %" PRIu64, + ret, stream->stream_handle); + + ret = write_padding_to_file(stream->fd, be32toh(data_hdr.padding_size)); + if (ret < 0) { + goto end_unlock; + } + + stream->prev_seq = net_seq_num; + + /* Check if we need to close the FD */ + if (close_stream_check(stream)) { + struct lttng_ht_iter iter; + + close(stream->fd); + iter.iter.node = &stream->stream_n.node; + ret = lttng_ht_del(streams_ht, &iter); + assert(!ret); + call_rcu(&stream->rcu_node, + deferred_free_stream); + DBG("Closed tracefile %d after recv data", stream->fd); + } end_unlock: rcu_read_unlock(); @@ -1282,7 +1565,7 @@ int relay_add_connection(int fd, struct lttng_poll_event *events, goto error; } ret = read(fd, relay_connection, sizeof(struct relay_command)); - if (ret < 0 || ret < sizeof(relay_connection)) { + if (ret < 0 || ret < sizeof(struct relay_command)) { PERROR("read relay cmd pipe"); goto error_read; } @@ -1377,6 +1660,7 @@ void *relay_thread_worker(void *data) /* Infinite blocking call, waiting for transmission */ restart: + DBG3("Relayd worker thread polling..."); ret = lttng_poll_wait(&events, -1); if (ret < 0) { /* @@ -1454,7 +1738,7 @@ void *relay_thread_worker(void *data) DBG("Control connection closed with %d", pollfd); } else { if (relay_connection->session) { - DBG2("Relay worker receiving data for session : %lu", + DBG2("Relay worker receiving data for session : %" PRIu64, relay_connection->session->id); } ret = relay_process_control(&recv_hdr,