X-Git-Url: https://git.lttng.org/?a=blobdiff_plain;f=src%2Fbin%2Flttng-relayd%2Fmain.c;h=11277f29346e99104cfbedd6453ace99a76493f6;hb=e5add6d004793894ef4c7e047bc0f8885763b205;hp=1e2e9050fef2d6d77ca78e9767a2fea759ba99be;hpb=c6db3843828a8fbf08444a2bc4191291a4807936;p=lttng-tools.git diff --git a/src/bin/lttng-relayd/main.c b/src/bin/lttng-relayd/main.c index 1e2e9050f..11277f293 100644 --- a/src/bin/lttng-relayd/main.c +++ b/src/bin/lttng-relayd/main.c @@ -74,6 +74,7 @@ #include "connection.h" #include "tracefile-array.h" #include "tcp_keep_alive.h" +#include "sessiond-trace-chunks.h" static const char *help_msg = #ifdef LTTNG_EMBED_HELP @@ -85,7 +86,7 @@ NULL enum relay_connection_status { RELAY_CONNECTION_STATUS_OK, - /* An error occured while processing an event on the connection. */ + /* An error occurred while processing an event on the connection. */ RELAY_CONNECTION_STATUS_ERROR, /* Connection closed/shutdown cleanly. */ RELAY_CONNECTION_STATUS_CLOSED, @@ -167,6 +168,8 @@ struct lttng_ht *sessions_ht; /* Relayd health monitoring */ struct health_app *health_relayd; +struct sessiond_trace_chunk_registry *sessiond_trace_chunk_registry; + static struct option long_options[] = { { "control-port", 1, 0, 'C', }, { "data-port", 1, 0, 'D', }, @@ -858,14 +861,6 @@ restart: revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); - if (!revents) { - /* - * No activity for this FD (poll - * implementation). - */ - continue; - } - /* Thread quit pipe has been closed. Killing thread. */ ret = check_thread_quit_pipe(pollfd, revents); if (ret) { @@ -1099,17 +1094,16 @@ static int relay_create_session(const struct lttcomm_relayd_hdr *recv_hdr, { int ret = 0; ssize_t send_ret; - struct relay_session *session; - struct lttcomm_relayd_status_session reply; - char session_name[LTTNG_NAME_MAX]; - char hostname[LTTNG_HOST_NAME_MAX]; + struct relay_session *session = NULL; + struct lttcomm_relayd_status_session reply = {}; + char session_name[LTTNG_NAME_MAX] = {}; + char hostname[LTTNG_HOST_NAME_MAX] = {}; uint32_t live_timer = 0; bool snapshot = false; - - memset(session_name, 0, LTTNG_NAME_MAX); - memset(hostname, 0, LTTNG_HOST_NAME_MAX); - - memset(&reply, 0, sizeof(reply)); + /* Left nil for peers < 2.11. */ + lttng_uuid sessiond_uuid = {}; + LTTNG_OPTIONAL(uint64_t) id_sessiond = {}; + LTTNG_OPTIONAL(uint64_t) current_chunk_id = {}; if (conn->minor < 4) { /* From 2.1 to 2.3 */ @@ -1119,9 +1113,22 @@ static int relay_create_session(const struct lttcomm_relayd_hdr *recv_hdr, ret = cmd_create_session_2_4(payload, session_name, hostname, &live_timer, &snapshot); } else { + bool has_current_chunk; + /* From 2.11 to ... */ ret = cmd_create_session_2_11(payload, session_name, - hostname, &live_timer, &snapshot); + hostname, &live_timer, &snapshot, + &id_sessiond.value, sessiond_uuid, + &has_current_chunk, + ¤t_chunk_id.value); + if (lttng_uuid_is_nil(sessiond_uuid)) { + /* The nil UUID is reserved for pre-2.11 clients. */ + ERR("Illegal nil UUID announced by peer in create session command"); + ret = -1; + goto send_reply; + } + id_sessiond.is_set = true; + current_chunk_id.is_set = has_current_chunk; } if (ret < 0) { @@ -1129,7 +1136,10 @@ static int relay_create_session(const struct lttcomm_relayd_hdr *recv_hdr, } session = session_create(session_name, hostname, live_timer, - snapshot, conn->major, conn->minor); + snapshot, sessiond_uuid, + id_sessiond.is_set ? &id_sessiond.value : NULL, + current_chunk_id.is_set ? ¤t_chunk_id.value : NULL, + conn->major, conn->minor); if (!session) { ret = -1; goto send_reply; @@ -1153,7 +1163,9 @@ send_reply: send_ret); ret = -1; } - + if (ret < 0 && session) { + session_put(session); + } return ret; } @@ -1640,7 +1652,7 @@ int rotate_truncate_stream(struct relay_stream *stream) /* * Rewind the current tracefile to the position at which the rotation - * should have occured. + * should have occurred. */ lseek_ret = lseek(stream->stream_fd->fd, stream->pos_after_last_complete_data_index, SEEK_SET); @@ -2522,8 +2534,6 @@ static int relay_rotate_session_stream(const struct lttcomm_relayd_hdr *recv_hdr size_t path_len; struct lttng_buffer_view new_path_view; - DBG("Rotate stream received"); - if (!session || !conn->version_check_done) { ERR("Trying to rotate a stream before version check"); ret = -1; @@ -2650,362 +2660,196 @@ end_no_reply: return ret; } -/* - * relay_mkdir: Create a folder on the disk. - */ -static int relay_mkdir(const struct lttcomm_relayd_hdr *recv_hdr, - struct relay_connection *conn, - const struct lttng_buffer_view *payload) +static int init_session_output_directory_handle(struct relay_session *session, + struct lttng_directory_handle *handle) { int ret; - struct relay_session *session = conn->session; - struct lttcomm_relayd_mkdir path_info_header; - struct lttcomm_relayd_generic_reply reply; - char *path = NULL; - size_t header_len; - ssize_t send_ret; - struct lttng_buffer_view path_view; - - if (!session || !conn->version_check_done) { - ERR("Trying to create a directory before version check"); - ret = -1; - goto end_no_session; - } - - if (session->major == 2 && session->minor < 11) { - /* - * This client is not supposed to use this command since - * it predates its introduction. - */ - ERR("relay_mkdir command is unsupported before LTTng 2.11"); - ret = -1; - goto end_no_session; - } + /* hostname/session_name */ + char *session_directory = NULL; + /* + * base path + session_directory + * e.g. /home/user/lttng-traces/hostname/session_name + */ + char *full_session_path = NULL; - header_len = sizeof(path_info_header); - if (payload->size < header_len) { - ERR("Unexpected payload size in \"relay_mkdir\": expected >= %zu bytes, got %zu bytes", - header_len, payload->size); - ret = -1; - goto end_no_session; + pthread_mutex_lock(&session->lock); + ret = asprintf(&session_directory, "%s/%s", session->hostname, + session->session_name); + pthread_mutex_unlock(&session->lock); + if (ret < 0) { + PERROR("Failed to format session directory name"); + goto end; } - memcpy(&path_info_header, payload->data, header_len); - - path_info_header.length = be32toh(path_info_header.length); - - if (payload->size < header_len + path_info_header.length) { - ERR("Unexpected payload size in \"relay_mkdir\" including path: expected >= %zu bytes, got %zu bytes", - header_len + path_info_header.length, payload->size); + full_session_path = create_output_path(session_directory); + if (!full_session_path) { ret = -1; - goto end_no_session; - } - - /* Ensure that it fits in local path length. */ - if (path_info_header.length >= LTTNG_PATH_MAX) { - ret = -ENAMETOOLONG; - ERR("Path name argument of mkdir command (%" PRIu32 " bytes) exceeds the maximal length allowed (%d bytes)", - path_info_header.length, LTTNG_PATH_MAX); goto end; } - path_view = lttng_buffer_view_from_view(payload, header_len, - path_info_header.length); - - path = create_output_path(path_view.data); - if (!path) { - ERR("Failed to create output path"); - ret = -1; + ret = utils_mkdir_recursive( + full_session_path, S_IRWXU | S_IRWXG, -1, -1); + if (ret) { + ERR("Failed to create session output path \"%s\"", + full_session_path); goto end; } - DBG("MKDIR command has path \"%s\", changed to \"%s\"", path_view.data, path); - ret = utils_mkdir_recursive(path, S_IRWXU | S_IRWXG, -1, -1); - if (ret < 0) { - ERR("relay creating output directory"); + ret = lttng_directory_handle_init(handle, full_session_path); + if (ret) { goto end; } - - ret = 0; - end: - memset(&reply, 0, sizeof(reply)); - if (ret < 0) { - reply.ret_code = htobe32(LTTNG_ERR_UNK); - } else { - reply.ret_code = htobe32(LTTNG_OK); - } - send_ret = conn->sock->ops->sendmsg(conn->sock, &reply, sizeof(reply), 0); - if (send_ret < (ssize_t) sizeof(reply)) { - ERR("Failed to send \"mkdir\" command reply (ret = %zd)", send_ret); - ret = -1; - } - -end_no_session: - free(path); - return ret; -} - -static int validate_rotate_rename_path_length(const char *path_type, - uint32_t path_length) -{ - int ret = 0; - - if (path_length > LTTNG_PATH_MAX) { - ret = -ENAMETOOLONG; - ERR("rotate rename \"%s\" path name length (%" PRIu32 " bytes) exceeds the allowed size of %i bytes", - path_type, path_length, LTTNG_PATH_MAX); - } else if (path_length == 0) { - ret = -EINVAL; - ERR("rotate rename \"%s\" path name has an illegal length of 0", path_type); - } + free(session_directory); + free(full_session_path); return ret; } /* - * relay_rotate_rename: rename the trace folder after a rotation is - * completed. We are not closing any fd here, just moving the folder, so it - * works even if data is still in-flight. + * relay_create_trace_chunk: create a new trace chunk */ -static int relay_rotate_rename(const struct lttcomm_relayd_hdr *recv_hdr, +static int relay_create_trace_chunk(const struct lttcomm_relayd_hdr *recv_hdr, struct relay_connection *conn, const struct lttng_buffer_view *payload) { - int ret; + int ret = 0; ssize_t send_ret; struct relay_session *session = conn->session; - struct lttcomm_relayd_generic_reply reply; - struct lttcomm_relayd_rotate_rename header; - size_t header_len; - size_t received_paths_size; - char *complete_old_path = NULL, *complete_new_path = NULL; - struct lttng_buffer_view old_path_view; - struct lttng_buffer_view new_path_view; + struct lttcomm_relayd_create_trace_chunk *msg; + struct lttcomm_relayd_generic_reply reply = {}; + struct lttng_buffer_view header_view; + struct lttng_buffer_view chunk_name_view; + struct lttng_trace_chunk *chunk = NULL, *published_chunk = NULL; + enum lttng_error_code reply_code = LTTNG_OK; + enum lttng_trace_chunk_status chunk_status; + struct lttng_directory_handle session_output; if (!session || !conn->version_check_done) { - ERR("Trying to rename a trace folder before version check"); + ERR("Trying to create a trace chunk before version check"); ret = -1; goto end_no_reply; } if (session->major == 2 && session->minor < 11) { - ERR("relay_rotate_rename command is unsupported before LTTng 2.11"); + ERR("Chunk creation command is unsupported before 2.11"); ret = -1; goto end_no_reply; } - header_len = sizeof(header); - if (payload->size < header_len) { - ERR("Unexpected payload size in \"relay_rotate_rename\": expected >= %zu bytes, got %zu bytes", - header_len, payload->size); + header_view = lttng_buffer_view_from_view(payload, 0, sizeof(*msg)); + if (!header_view.data) { + ERR("Failed to receive payload of chunk creation command"); ret = -1; goto end_no_reply; } - memcpy(&header, payload->data, header_len); - - header.old_path_length = be32toh(header.old_path_length); - header.new_path_length = be32toh(header.new_path_length); - received_paths_size = header.old_path_length + header.new_path_length; + /* Convert to host endianness. */ + msg = (typeof(msg)) header_view.data; + msg->chunk_id = be64toh(msg->chunk_id); + msg->creation_timestamp = be64toh(msg->creation_timestamp); + msg->override_name_length = be32toh(msg->override_name_length); - if (payload->size < header_len + received_paths_size) { - ERR("Unexpected payload size in \"relay_rotate_rename\" including paths: expected >= %zu bytes, got %zu bytes", - header_len, payload->size); + chunk = lttng_trace_chunk_create( + msg->chunk_id, msg->creation_timestamp); + if (!chunk) { + ERR("Failed to create trace chunk in trace chunk creation command"); ret = -1; - goto end_no_reply; - } - - /* Ensure the paths don't exceed their allowed size. */ - ret = validate_rotate_rename_path_length("old", header.old_path_length); - if (ret) { - goto end; - } - ret = validate_rotate_rename_path_length("new", header.new_path_length); - if (ret) { + reply_code = LTTNG_ERR_NOMEM; goto end; } - old_path_view = lttng_buffer_view_from_view(payload, header_len, - header.old_path_length); - new_path_view = lttng_buffer_view_from_view(payload, - header_len + header.old_path_length, - header.new_path_length); + if (msg->override_name_length) { + const char *name; - /* Validate that both paths received are NULL terminated. */ - if (old_path_view.data[old_path_view.size - 1] != '\0') { - ERR("relay_rotate_rename command's \"old\" path is invalid (not NULL terminated)"); - ret = -1; - goto end; - } - if (new_path_view.data[new_path_view.size - 1] != '\0') { - ERR("relay_rotate_rename command's \"new\" path is invalid (not NULL terminated)"); - ret = -1; - goto end; - } - - DBG("ROTATE_RENAME command has argument old path = \"%s\", new_path = \"%s\"", - old_path_view.data, new_path_view.data); - complete_old_path = create_output_path(old_path_view.data); - if (!complete_old_path) { - ERR("Failed to build old output path in rotate_rename command"); - ret = -1; - goto end; - } - - complete_new_path = create_output_path(new_path_view.data); - if (!complete_new_path) { - ERR("Failed to build new output path in rotate_rename command"); - ret = -1; - goto end; - } - DBG("Expanded ROTATE_RENAME arguments to old path = \"%s\", new_path = \"%s\"", - complete_old_path, complete_new_path); + chunk_name_view = lttng_buffer_view_from_view(payload, + sizeof(*msg), + msg->override_name_length); + name = chunk_name_view.data; + if (!name || name[msg->override_name_length - 1]) { + ERR("Failed to receive payload of chunk creation command"); + ret = -1; + reply_code = LTTNG_ERR_INVALID; + goto end; + } - ret = utils_mkdir_recursive(complete_new_path, S_IRWXU | S_IRWXG, - -1, -1); - if (ret < 0) { - ERR("Failed to mkdir() rotate_rename's \"new\" output directory at \"%s\"", - complete_new_path); - goto end; + chunk_status = lttng_trace_chunk_override_name( + chunk, chunk_name_view.data); + switch (chunk_status) { + case LTTNG_TRACE_CHUNK_STATUS_OK: + break; + case LTTNG_TRACE_CHUNK_STATUS_INVALID_ARGUMENT: + ERR("Failed to set the name of new trace chunk in trace chunk creation command (invalid name)"); + reply_code = LTTNG_ERR_INVALID; + ret = -1; + goto end; + default: + ERR("Failed to set the name of new trace chunk in trace chunk creation command (unknown error)"); + reply_code = LTTNG_ERR_UNK; + ret = -1; + goto end; + } } - /* - * If a domain has not yet created its channel, the domain-specific - * folder might not exist, but this is not an error. - */ - ret = rename(complete_old_path, complete_new_path); - if (ret < 0 && errno != ENOENT) { - PERROR("Renaming chunk in rotate_rename command from \"%s\" to \"%s\"", - complete_old_path, complete_new_path); + ret = init_session_output_directory_handle( + conn->session, &session_output); + if (ret) { + reply_code = LTTNG_ERR_CREATE_DIR_FAIL; goto end; } - ret = 0; -end: - memset(&reply, 0, sizeof(reply)); - if (ret < 0) { - reply.ret_code = htobe32(LTTNG_ERR_UNK); - } else { - reply.ret_code = htobe32(LTTNG_OK); - } - send_ret = conn->sock->ops->sendmsg(conn->sock, &reply, - sizeof(reply), 0); - if (send_ret < sizeof(reply)) { - ERR("Failed to send \"rotate rename\" command reply (ret = %zd)", - send_ret); + chunk_status = lttng_trace_chunk_set_credentials_current_user(chunk); + if (chunk_status != LTTNG_TRACE_CHUNK_STATUS_OK) { + reply_code = LTTNG_ERR_UNK; ret = -1; + goto end; } -end_no_reply: - free(complete_old_path); - free(complete_new_path); - return ret; -} - -/* - * Check if all the streams in the session have completed the last rotation. - * The chunk_id value is used to distinguish the cases where a stream was - * closed on the consumerd before the rotation started but it still active on - * the relayd, and the case where a stream appeared on the consumerd/relayd - * after the last rotation started (in that case, it is already writing in the - * new chunk folder). - */ -static -int relay_rotate_pending(const struct lttcomm_relayd_hdr *recv_hdr, - struct relay_connection *conn, - const struct lttng_buffer_view *payload) -{ - struct relay_session *session = conn->session; - struct lttcomm_relayd_rotate_pending msg; - struct lttcomm_relayd_rotate_pending_reply reply; - struct lttng_ht_iter iter; - struct relay_stream *stream; - int ret = 0; - ssize_t send_ret; - uint64_t chunk_id; - bool rotate_pending = false; - - DBG("Rotate pending command received"); - - if (!session || !conn->version_check_done) { - ERR("Trying to check for data before version check"); + chunk_status = lttng_trace_chunk_set_as_owner(chunk, &session_output); + if (chunk_status != LTTNG_TRACE_CHUNK_STATUS_OK) { + reply_code = LTTNG_ERR_UNK; ret = -1; - goto end_no_reply; + goto end; } - if (session->major == 2 && session->minor < 11) { - ERR("Unsupported feature before 2.11"); - ret = -1; - goto end_no_reply; - } + published_chunk = sessiond_trace_chunk_registry_publish_chunk( + sessiond_trace_chunk_registry, + conn->session->sessiond_uuid, + conn->session->id, + chunk); + if (!published_chunk) { + char uuid_str[UUID_STR_LEN]; - if (payload->size < sizeof(msg)) { - ERR("Unexpected payload size in \"relay_rotate_pending\": expected >= %zu bytes, got %zu bytes", - sizeof(msg), payload->size); + lttng_uuid_to_str(conn->session->sessiond_uuid, uuid_str); + ERR("Failed to publish chunk: sessiond_uuid = %s, session_id = %" PRIu64 ", chunk_id = %" PRIu64, + uuid_str, + conn->session->id, + msg->chunk_id); ret = -1; - goto end_no_reply; + reply_code = LTTNG_ERR_NOMEM; + goto end; } - memcpy(&msg, payload->data, sizeof(msg)); - - chunk_id = be64toh(msg.chunk_id); - - DBG("Evaluating rotate pending for session \"%s\" and chunk id %" PRIu64, - session->session_name, chunk_id); - - /* - * Iterate over all the streams in the session and check if they are - * still waiting for data to perform their rotation. - */ - rcu_read_lock(); - cds_lfht_for_each_entry(relay_streams_ht->ht, &iter.iter, stream, - node.node) { - if (!stream_get(stream)) { - continue; - } - if (stream->trace->session != session) { - stream_put(stream); - continue; - } - pthread_mutex_lock(&stream->lock); - if (stream->rotate_at_seq_num != -1ULL) { - /* We have not yet performed the rotation. */ - rotate_pending = true; - DBG("Stream %" PRIu64 " is still rotating", - stream->stream_handle); - } else if (stream->current_chunk_id.value < chunk_id) { - /* - * Stream closed on the consumer but still active on the - * relay. - */ - rotate_pending = true; - DBG("Stream %" PRIu64 " did not exist on the consumer " - "when the last rotation started, but is" - "still waiting for data before getting" - "closed", - stream->stream_handle); - } - pthread_mutex_unlock(&stream->lock); - stream_put(stream); - if (rotate_pending) { - goto send_reply; - } - } + pthread_mutex_lock(&conn->session->lock); + lttng_trace_chunk_put(conn->session->current_trace_chunk); + conn->session->current_trace_chunk = published_chunk; + pthread_mutex_unlock(&conn->session->lock); + published_chunk = NULL; -send_reply: - rcu_read_unlock(); - memset(&reply, 0, sizeof(reply)); - reply.generic.ret_code = htobe32((uint32_t) LTTNG_OK); - reply.is_pending = (uint8_t) !!rotate_pending; - send_ret = conn->sock->ops->sendmsg(conn->sock, &reply, - sizeof(reply), 0); +end: + reply.ret_code = htobe32((uint32_t) reply_code); + send_ret = conn->sock->ops->sendmsg(conn->sock, + &reply, + sizeof(struct lttcomm_relayd_generic_reply), + 0); if (send_ret < (ssize_t) sizeof(reply)) { - ERR("Failed to send \"rotate pending\" command reply (ret = %zd)", + ERR("Failed to send \"create trace chunk\" command reply (ret = %zd)", send_ret); ret = -1; } - end_no_reply: + lttng_trace_chunk_put(chunk); + lttng_trace_chunk_put(published_chunk); + lttng_directory_handle_fini(&session_output); return ret; } @@ -3075,17 +2919,9 @@ static int relay_process_control_command(struct relay_connection *conn, DBG_CMD("RELAYD_ROTATE_STREAM", conn); ret = relay_rotate_session_stream(header, conn, payload); break; - case RELAYD_ROTATE_RENAME: - DBG_CMD("RELAYD_ROTATE_RENAME", conn); - ret = relay_rotate_rename(header, conn, payload); - break; - case RELAYD_ROTATE_PENDING: - DBG_CMD("RELAYD_ROTATE_PENDING", conn); - ret = relay_rotate_pending(header, conn, payload); - break; - case RELAYD_MKDIR: - DBG_CMD("RELAYD_MKDIR", conn); - ret = relay_mkdir(header, conn, payload); + case RELAYD_CREATE_TRACE_CHUNK: + DBG_CMD("RELAYD_CREATE_TRACE_CHUNK", conn); + ret = relay_create_trace_chunk(header, conn, payload); break; case RELAYD_UPDATE_SYNC_INFO: default: @@ -3811,14 +3647,6 @@ restart: health_code_update(); - if (!revents) { - /* - * No activity for this FD (poll - * implementation). - */ - continue; - } - /* Thread quit pipe has been closed. Killing thread. */ ret = check_thread_quit_pipe(pollfd, revents); if (ret) { @@ -4130,6 +3958,13 @@ int main(int argc, char **argv) } } + sessiond_trace_chunk_registry = sessiond_trace_chunk_registry_create(); + if (!sessiond_trace_chunk_registry) { + ERR("Failed to initialize session daemon trace chunk registry"); + retval = -1; + goto exit_sessiond_trace_chunk_registry; + } + /* Initialize thread health monitoring */ health_relayd = health_app_create(NR_HEALTH_RELAYD_TYPES); if (!health_relayd) { @@ -4279,7 +4114,9 @@ exit_health_quit_pipe: exit_init_data: health_app_destroy(health_relayd); + sessiond_trace_chunk_registry_destroy(sessiond_trace_chunk_registry); exit_health_app_create: +exit_sessiond_trace_chunk_registry: exit_options: /* * Wait for all pending call_rcu work to complete before tearing