X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-relayd%2Fmain.c;h=2a7152b892192f7e683bf881b2b106af0a2da36c;hp=ae631c70a25afc3407736ea8478f782b2c8793c7;hb=7c5aef6226a4752f3a4e60cd0b52c741dced395e;hpb=173af62f4804133d4a7f45e34b6f72126f3eca5f diff --git a/src/bin/lttng-relayd/main.c b/src/bin/lttng-relayd/main.c index ae631c70a..2a7152b89 100644 --- a/src/bin/lttng-relayd/main.c +++ b/src/bin/lttng-relayd/main.c @@ -239,8 +239,8 @@ void cleanup(void) /* Close thread quit pipes */ utils_close_pipe(thread_quit_pipe); - /* Close relay cmd pipes */ - utils_close_pipe(relay_cmd_pipe); + uri_free(control_uri); + uri_free(data_uri); } /* @@ -472,12 +472,6 @@ void *relay_thread_listener(void *data) struct lttng_poll_event events; struct lttcomm_sock *control_sock, *data_sock; - /* - * Get allocated in this thread, enqueued to a global queue, dequeued and - * freed in the worker thread. - */ - struct relay_command *relay_cmd = NULL; - DBG("[thread] Relay listener started"); control_sock = relay_init_sock(control_uri); @@ -544,7 +538,13 @@ restart: ERR("socket poll error"); goto error; } else if (revents & LPOLLIN) { - struct lttcomm_sock *newsock = NULL; + /* + * Get allocated in this thread, + * enqueued to a global queue, dequeued + * and freed in the worker thread. + */ + struct relay_command *relay_cmd; + struct lttcomm_sock *newsock; relay_cmd = zmalloc(sizeof(struct relay_command)); if (relay_cmd == NULL) { @@ -554,16 +554,19 @@ restart: if (pollfd == data_sock->fd) { newsock = data_sock->ops->accept(data_sock); - if (newsock < 0) { + if (!newsock) { PERROR("accepting data sock"); + free(relay_cmd); goto error; } relay_cmd->type = RELAY_DATA; DBG("Relay data connection accepted, socket %d", newsock->fd); - } else if (pollfd == control_sock->fd) { + } else { + assert(pollfd == control_sock->fd); newsock = control_sock->ops->accept(control_sock); - if (newsock < 0) { + if (!newsock) { PERROR("accepting control sock"); + free(relay_cmd); goto error; } relay_cmd->type = RELAY_CONTROL; @@ -573,6 +576,8 @@ restart: &val, sizeof(int)); if (ret < 0) { PERROR("setsockopt inet"); + lttcomm_destroy_sock(newsock); + free(relay_cmd); goto error; } relay_cmd->sock = newsock; @@ -878,8 +883,9 @@ void relay_delete_session(struct relay_command *cmd, struct lttng_ht *streams_ht return; } - DBG("Relay deleting session %lu", cmd->session->id); - free(cmd->session->sock); + DBG("Relay deleting session %" PRIu64, cmd->session->id); + + lttcomm_destroy_sock(cmd->session->sock); rcu_read_lock(); cds_lfht_for_each_entry(streams_ht->ht, &iter.iter, node, node) { @@ -888,7 +894,10 @@ void relay_delete_session(struct relay_command *cmd, struct lttng_ht *streams_ht stream = caa_container_of(node, struct relay_stream, stream_n); if (stream->session == cmd->session) { - close(stream->fd); + ret = close(stream->fd); + if (ret < 0) { + PERROR("close stream fd on delete session"); + } ret = lttng_ht_del(streams_ht, &iter); assert(!ret); call_rcu(&stream->rcu_node, @@ -897,6 +906,8 @@ void relay_delete_session(struct relay_command *cmd, struct lttng_ht *streams_ht } } rcu_read_unlock(); + + free(cmd->session); } /* @@ -921,7 +932,7 @@ int relay_add_stream(struct lttcomm_relayd_hdr *recv_hdr, /* FIXME : use data_size for something ? */ ret = cmd->sock->ops->recvmsg(cmd->sock, &stream_info, - sizeof(struct lttcomm_relayd_add_stream), MSG_WAITALL); + sizeof(struct lttcomm_relayd_add_stream), 0); if (ret < sizeof(struct lttcomm_relayd_add_stream)) { ERR("Relay didn't receive valid add_stream struct size : %d", ret); ret = -1; @@ -977,9 +988,9 @@ end: free(root_path); /* send the session id to the client or a negative return code on error */ if (ret < 0) { - reply.ret_code = htobe32(LTTCOMM_ERR); + reply.ret_code = htobe32(LTTNG_ERR_UNK); } else { - reply.ret_code = htobe32(LTTCOMM_OK); + reply.ret_code = htobe32(LTTNG_OK); } reply.handle = htobe64(stream->stream_handle); send_ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, @@ -1017,7 +1028,7 @@ int relay_close_stream(struct lttcomm_relayd_hdr *recv_hdr, } ret = cmd->sock->ops->recvmsg(cmd->sock, &stream_info, - sizeof(struct lttcomm_relayd_close_stream), MSG_WAITALL); + sizeof(struct lttcomm_relayd_close_stream), 0); if (ret < sizeof(struct lttcomm_relayd_close_stream)) { ERR("Relay didn't receive valid add_stream struct size : %d", ret); ret = -1; @@ -1030,7 +1041,7 @@ int relay_close_stream(struct lttcomm_relayd_hdr *recv_hdr, &iter); node = lttng_ht_iter_get_node_ulong(&iter); if (node == NULL) { - DBG("Relay stream %lu not found", be64toh(stream_info.stream_id)); + DBG("Relay stream %" PRIu64 " not found", be64toh(stream_info.stream_id)); ret = -1; goto end_unlock; } @@ -1041,12 +1052,16 @@ int relay_close_stream(struct lttcomm_relayd_hdr *recv_hdr, goto end_unlock; } + stream->last_net_seq_num = be64toh(stream_info.last_net_seq_num); stream->close_flag = 1; if (close_stream_check(stream)) { int delret; - close(stream->fd); + delret = close(stream->fd); + if (delret < 0) { + PERROR("close stream"); + } delret = lttng_ht_del(streams_ht, &iter); assert(!delret); call_rcu(&stream->rcu_node, @@ -1058,9 +1073,9 @@ end_unlock: rcu_read_unlock(); if (ret < 0) { - reply.ret_code = htobe32(LTTCOMM_ERR); + reply.ret_code = htobe32(LTTNG_ERR_UNK); } else { - reply.ret_code = htobe32(LTTCOMM_OK); + reply.ret_code = htobe32(LTTNG_OK); } send_ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(struct lttcomm_relayd_generic_reply), 0); @@ -1081,7 +1096,7 @@ void relay_unknown_command(struct relay_command *cmd) struct lttcomm_relayd_generic_reply reply; int ret; - reply.ret_code = htobe32(LTTCOMM_ERR); + reply.ret_code = htobe32(LTTNG_ERR_UNK); ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(struct lttcomm_relayd_generic_reply), 0); if (ret < 0) { @@ -1097,13 +1112,13 @@ static int relay_start(struct lttcomm_relayd_hdr *recv_hdr, struct relay_command *cmd) { - int ret = htobe32(LTTCOMM_OK); + int ret = htobe32(LTTNG_OK); struct lttcomm_relayd_generic_reply reply; struct relay_session *session = cmd->session; if (!session) { DBG("Trying to start the streaming without a session established"); - ret = htobe32(LTTCOMM_ERR); + ret = htobe32(LTTNG_ERR_UNK); } reply.ret_code = ret; @@ -1133,7 +1148,7 @@ struct relay_stream *relay_stream_from_stream_id(uint64_t stream_id, &iter); node = lttng_ht_iter_get_node_ulong(&iter); if (node == NULL) { - DBG("Relay stream %lu not found", stream_id); + DBG("Relay stream %" PRIu64 " not found", stream_id); ret = NULL; goto end; } @@ -1144,6 +1159,38 @@ end: return ret; } +/* + * Append padding to the file pointed by the file descriptor fd. + */ +static int write_padding_to_file(int fd, uint32_t size) +{ + int ret = 0; + char *zeros; + + if (size == 0) { + goto end; + } + + zeros = zmalloc(size); + if (zeros == NULL) { + PERROR("zmalloc zeros for padding"); + ret = -1; + goto end; + } + + do { + ret = write(fd, zeros, size); + } while (ret < 0 && errno == EINTR); + if (ret < 0) { + PERROR("write padding to file"); + } + + free(zeros); + +end: + return ret; +} + /* * relay_recv_metadata: receive the metada for the session. */ @@ -1151,7 +1198,7 @@ static int relay_recv_metadata(struct lttcomm_relayd_hdr *recv_hdr, struct relay_command *cmd, struct lttng_ht *streams_ht) { - int ret = htobe32(LTTCOMM_OK); + int ret = htobe32(LTTNG_OK); struct relay_session *session = cmd->session; struct lttcomm_relayd_metadata_payload *metadata_struct; struct relay_stream *metadata_stream; @@ -1172,18 +1219,20 @@ int relay_recv_metadata(struct lttcomm_relayd_hdr *recv_hdr, payload_size -= sizeof(struct lttcomm_relayd_metadata_payload); if (data_buffer_size < data_size) { + /* In case the realloc fails, we can free the memory */ + char *tmp_data_ptr = data_buffer; data_buffer = realloc(data_buffer, data_size); if (!data_buffer) { ERR("Allocating data buffer"); + free(tmp_data_ptr); ret = -1; goto end; } data_buffer_size = data_size; } memset(data_buffer, 0, data_size); - DBG2("Relay receiving metadata, waiting for %lu bytes", data_size); - ret = cmd->sock->ops->recvmsg(cmd->sock, data_buffer, data_size, - MSG_WAITALL); + DBG2("Relay receiving metadata, waiting for %" PRIu64 " bytes", data_size); + ret = cmd->sock->ops->recvmsg(cmd->sock, data_buffer, data_size, 0); if (ret < 0 || ret != data_size) { ret = -1; ERR("Relay didn't receive the whole metadata"); @@ -1208,6 +1257,13 @@ int relay_recv_metadata(struct lttcomm_relayd_hdr *recv_hdr, ret = -1; goto end_unlock; } + + ret = write_padding_to_file(metadata_stream->fd, + be32toh(metadata_struct->padding_size)); + if (ret < 0) { + goto end_unlock; + } + DBG2("Relay metadata written"); end_unlock: @@ -1223,9 +1279,9 @@ static int relay_send_version(struct lttcomm_relayd_hdr *recv_hdr, struct relay_command *cmd) { - int ret = htobe32(LTTCOMM_OK); - struct lttcomm_relayd_version reply; - struct relay_session *session = NULL; + int ret; + struct lttcomm_relayd_version reply, msg; + struct relay_session *session; if (cmd->session == NULL) { session = zmalloc(sizeof(struct relay_session)); @@ -1235,12 +1291,34 @@ int relay_send_version(struct lttcomm_relayd_hdr *recv_hdr, goto end; } session->id = ++last_relay_session_id; - DBG("Created session %lu", session->id); + DBG("Created session %" PRIu64, session->id); cmd->session = session; + } else { + session = cmd->session; } session->version_check_done = 1; - sscanf(VERSION, "%u.%u", &reply.major, &reply.minor); + /* Get version from the other side. */ + ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0); + if (ret < 0 || ret != sizeof(msg)) { + ret = -1; + ERR("Relay failed to receive the version values."); + goto end; + } + + /* + * For now, we just ignore the received version but after 2.1 stable + * release, a check must be done to see if we either adapt to the other + * side version (which MUST be lower than us) or keep the latest data + * structure considering that the other side will adapt. + */ + + ret = sscanf(VERSION, "%u.%u", &reply.major, &reply.minor); + if (ret < 2) { + ERR("Error in scanning version"); + ret = -1; + goto end; + } reply.major = htobe32(reply.major); reply.minor = htobe32(reply.minor); ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, @@ -1248,12 +1326,110 @@ int relay_send_version(struct lttcomm_relayd_hdr *recv_hdr, if (ret < 0) { ERR("Relay sending version"); } - DBG("Version check done"); + DBG("Version check done (%u.%u)", be32toh(reply.major), + be32toh(reply.minor)); end: return ret; } +/* + * Check for data pending for a given stream id from the session daemon. + */ +static +int relay_data_pending(struct lttcomm_relayd_hdr *recv_hdr, + struct relay_command *cmd, struct lttng_ht *streams_ht) +{ + struct relay_session *session = cmd->session; + struct lttcomm_relayd_data_pending msg; + struct lttcomm_relayd_generic_reply reply; + struct relay_stream *stream; + int ret; + struct lttng_ht_node_ulong *node; + struct lttng_ht_iter iter; + uint64_t last_net_seq_num, stream_id; + + DBG("Data pending command received"); + + if (!session || session->version_check_done == 0) { + ERR("Trying to check for data before version check"); + ret = -1; + goto end_no_session; + } + + ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0); + if (ret < sizeof(msg)) { + ERR("Relay didn't receive valid data_pending struct size : %d", ret); + ret = -1; + goto end_no_session; + } + + stream_id = be64toh(msg.stream_id); + last_net_seq_num = be64toh(msg.last_net_seq_num); + + rcu_read_lock(); + lttng_ht_lookup(streams_ht, (void *)((unsigned long) stream_id), &iter); + node = lttng_ht_iter_get_node_ulong(&iter); + if (node == NULL) { + DBG("Relay stream %" PRIu64 " not found", stream_id); + ret = -1; + goto end_unlock; + } + + stream = caa_container_of(node, struct relay_stream, stream_n); + assert(stream); + + DBG("Data pending for stream id %" PRIu64 " prev_seq %" PRIu64 + " and last_seq %" PRIu64, stream_id, stream->prev_seq, + last_net_seq_num); + + /* Avoid wrapping issue */ + if (((int64_t) (stream->prev_seq - last_net_seq_num)) <= 0) { + /* Data has in fact been written and is NOT pending */ + ret = 0; + } else { + /* Data still being streamed thus pending */ + ret = 1; + } + +end_unlock: + rcu_read_unlock(); + + reply.ret_code = htobe32(ret); + ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0); + if (ret < 0) { + ERR("Relay data pending ret code failed"); + } + +end_no_session: + return ret; +} + +/* + * Wait for the control socket to reach a quiescent state. + * + * Note that for now, when receiving this command from the session daemon, this + * means that every subsequent commands or data received on the control socket + * has been handled. So, this is why we simply return OK here. + */ +static +int relay_quiescent_control(struct lttcomm_relayd_hdr *recv_hdr, + struct relay_command *cmd) +{ + int ret; + struct lttcomm_relayd_generic_reply reply; + + DBG("Checking quiescent state on control socket"); + + reply.ret_code = htobe32(LTTNG_OK); + ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0); + if (ret < 0) { + ERR("Relay data quiescent control ret code failed"); + } + + return ret; +} + /* * relay_process_control: Process the commands received on the control socket */ @@ -1284,6 +1460,12 @@ int relay_process_control(struct lttcomm_relayd_hdr *recv_hdr, case RELAYD_CLOSE_STREAM: ret = relay_close_stream(recv_hdr, cmd, streams_ht); break; + case RELAYD_DATA_PENDING: + ret = relay_data_pending(recv_hdr, cmd, streams_ht); + break; + case RELAYD_QUIESCENT_CONTROL: + ret = relay_quiescent_control(recv_hdr, cmd); + break; case RELAYD_UPDATE_SYNC_INFO: default: ERR("Received unknown command (%u)", be32toh(recv_hdr->cmd)); @@ -1310,7 +1492,7 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *streams_ht) uint32_t data_size; ret = cmd->sock->ops->recvmsg(cmd->sock, &data_hdr, - sizeof(struct lttcomm_relayd_data_hdr), MSG_WAITALL); + sizeof(struct lttcomm_relayd_data_hdr), 0); if (ret <= 0) { ERR("Connections seems to be closed"); ret = -1; @@ -1328,9 +1510,11 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *streams_ht) data_size = be32toh(data_hdr.data_size); if (data_buffer_size < data_size) { + char *tmp_data_ptr = data_buffer; data_buffer = realloc(data_buffer, data_size); if (!data_buffer) { ERR("Allocating data buffer"); + free(tmp_data_ptr); ret = -1; goto end_unlock; } @@ -1340,9 +1524,9 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *streams_ht) net_seq_num = be64toh(data_hdr.net_seq_num); - DBG3("Receiving data of size %u for stream id %zu seqnum %" PRIu64, + DBG3("Receiving data of size %u for stream id %" PRIu64 " seqnum %" PRIu64, data_size, stream_id, net_seq_num); - ret = cmd->sock->ops->recvmsg(cmd->sock, data_buffer, data_size, MSG_WAITALL); + ret = cmd->sock->ops->recvmsg(cmd->sock, data_buffer, data_size, 0); if (ret <= 0) { ret = -1; goto end_unlock; @@ -1356,15 +1540,26 @@ int relay_process_data(struct relay_command *cmd, struct lttng_ht *streams_ht) ret = -1; goto end_unlock; } - DBG2("Relay wrote %d bytes to tracefile for stream id %lu", ret, stream->stream_handle); + + DBG2("Relay wrote %d bytes to tracefile for stream id %" PRIu64, + ret, stream->stream_handle); + + ret = write_padding_to_file(stream->fd, be32toh(data_hdr.padding_size)); + if (ret < 0) { + goto end_unlock; + } stream->prev_seq = net_seq_num; /* Check if we need to close the FD */ if (close_stream_check(stream)) { + int cret; struct lttng_ht_iter iter; - close(stream->fd); + cret = close(stream->fd); + if (cret < 0) { + PERROR("close stream process data"); + } iter.iter.node = &stream->stream_n.node; ret = lttng_ht_del(streams_ht, &iter); assert(!ret); @@ -1405,7 +1600,7 @@ int relay_add_connection(int fd, struct lttng_poll_event *events, goto error; } ret = read(fd, relay_connection, sizeof(struct relay_command)); - if (ret < 0 || ret < sizeof(relay_connection)) { + if (ret < 0 || ret < sizeof(struct relay_command)) { PERROR("read relay cmd pipe"); goto error_read; } @@ -1431,6 +1626,8 @@ void deferred_free_connection(struct rcu_head *head) { struct relay_command *relay_connection = caa_container_of(head, struct relay_command, rcu_node); + + lttcomm_destroy_sock(relay_connection->sock); free(relay_connection); } @@ -1446,6 +1643,7 @@ void relay_del_connection(struct lttng_ht *relay_connections_ht, if (relay_connection->type == RELAY_CONTROL) { relay_delete_session(relay_connection, streams_ht); } + call_rcu(&relay_connection->rcu_node, deferred_free_connection); } @@ -1568,7 +1766,7 @@ void *relay_thread_worker(void *data) if (relay_connection->type == RELAY_CONTROL) { ret = relay_connection->sock->ops->recvmsg( relay_connection->sock, &recv_hdr, - sizeof(struct lttcomm_relayd_hdr), MSG_WAITALL); + sizeof(struct lttcomm_relayd_hdr), 0); /* connection closed */ if (ret <= 0) { relay_cleanup_poll_connection(&events, pollfd); @@ -1578,7 +1776,7 @@ void *relay_thread_worker(void *data) DBG("Control connection closed with %d", pollfd); } else { if (relay_connection->session) { - DBG2("Relay worker receiving data for session : %lu", + DBG2("Relay worker receiving data for session : %" PRIu64, relay_connection->session->id); } ret = relay_process_control(&recv_hdr, @@ -1636,6 +1834,8 @@ error_poll_create: streams_ht_error: lttng_ht_destroy(relay_connections_ht); relay_connections_ht_error: + /* Close relay cmd pipes */ + utils_close_pipe(relay_cmd_pipe); if (err) { DBG("Thread exited with error"); }