X-Git-Url: https://git.lttng.org/?a=blobdiff_plain;f=src%2Fbin%2Flttng-sessiond%2Fcmd.c;h=65a24e4dcdbd492e3abe14969a6f92be45e5486c;hb=a503e1ef71bfe98526469205fc2956cc65954019;hp=e88d7a2f77eb41534062fbe01dbe990d11f65855;hpb=90936dcf0968343f20b2f6fd365b9c015cdb9717;p=lttng-tools.git diff --git a/src/bin/lttng-sessiond/cmd.c b/src/bin/lttng-sessiond/cmd.c index e88d7a2f7..65a24e4dc 100644 --- a/src/bin/lttng-sessiond/cmd.c +++ b/src/bin/lttng-sessiond/cmd.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -47,7 +48,7 @@ #include "kernel-consumer.h" #include "lttng-sessiond.h" #include "utils.h" -#include "syscall.h" +#include "lttng-syscall.h" #include "agent.h" #include "buffer-registry.h" #include "notification-thread.h" @@ -55,9 +56,34 @@ #include "rotate.h" #include "rotation-thread.h" #include "sessiond-timer.h" +#include "agent-thread.h" #include "cmd.h" +/* Sleep for 100ms between each check for the shm path's deletion. */ +#define SESSION_DESTROY_SHM_PATH_CHECK_DELAY_US 100000 + +static enum lttng_error_code wait_on_path(void *path); + +/* + * Command completion handler that is used by the destroy command + * when a session that has a non-default shm_path is being destroyed. + * + * See comment in cmd_destroy_session() for the rationale. + */ +static struct destroy_completion_handler { + struct cmd_completion_handler handler; + char shm_path[member_sizeof(struct ltt_session, shm_path)]; +} destroy_completion_handler = { + .handler = { + .run = wait_on_path, + .data = destroy_completion_handler.shm_path + }, + .shm_path = { 0 }, +}; + +static struct cmd_completion_handler *current_completion_handler; + /* * Used to keep a unique index for each relayd socket created where this value * is associated with streams on the consumer so it can match the right relayd @@ -917,8 +943,8 @@ error: * * The consumer socket lock must be held by the caller. */ -static int send_consumer_relayd_socket(enum lttng_domain_type domain, - unsigned int session_id, struct lttng_uri *relayd_uri, +static int send_consumer_relayd_socket(unsigned int session_id, + struct lttng_uri *relayd_uri, struct consumer_output *consumer, struct consumer_socket *consumer_sock, char *session_name, char *hostname, int session_live_timer) @@ -1003,7 +1029,7 @@ static int send_consumer_relayd_sockets(enum lttng_domain_type domain, /* Sending control relayd socket. */ if (!sock->control_sock_sent) { - ret = send_consumer_relayd_socket(domain, session_id, + ret = send_consumer_relayd_socket(session_id, &consumer->dst.net.control, consumer, sock, session_name, hostname, session_live_timer); if (ret != LTTNG_OK) { @@ -1013,7 +1039,7 @@ static int send_consumer_relayd_sockets(enum lttng_domain_type domain, /* Sending data relayd socket. */ if (!sock->data_sock_sent) { - ret = send_consumer_relayd_socket(domain, session_id, + ret = send_consumer_relayd_socket(session_id, &consumer->dst.net.data, consumer, sock, session_name, hostname, session_live_timer); if (ret != LTTNG_OK) { @@ -1152,7 +1178,7 @@ static int start_kernel_session(struct ltt_kernel_session *ksess, int wpipe) } /* Quiescent wait after starting trace */ - kernel_wait_quiescent(kernel_tracer_fd); + kernel_wait_quiescent(wpipe); ksess->active = 1; @@ -1384,9 +1410,15 @@ int cmd_enable_channel(struct ltt_session *session, break; } case LTTNG_DOMAIN_UST: + break; case LTTNG_DOMAIN_JUL: case LTTNG_DOMAIN_LOG4J: case LTTNG_DOMAIN_PYTHON: + if (!agent_tracing_is_enabled()) { + DBG("Attempted to enable a channel in an agent domain but the agent thread is not running"); + ret = LTTNG_ERR_AGENT_TRACING_DISABLED; + goto error; + } break; default: ret = LTTNG_ERR_UNKNOWN_DOMAIN; @@ -2094,6 +2126,12 @@ static int _cmd_enable_event(struct ltt_session *session, assert(usess); + if (!agent_tracing_is_enabled()) { + DBG("Attempted to enable an event in an agent domain but the agent thread is not running"); + ret = LTTNG_ERR_AGENT_TRACING_DISABLED; + goto error; + } + agt = trace_ust_find_agent(usess, domain->type); if (!agt) { agt = agent_create(domain->type); @@ -2600,7 +2638,7 @@ int rename_active_chunk(struct ltt_session *session) { int ret; - session->rotate_count++; + session->current_archive_id++; /* * The currently active tracing path is now the folder we @@ -2633,7 +2671,7 @@ int rename_active_chunk(struct ltt_session *session) goto end; } end: - session->rotate_count--; + session->current_archive_id--; return ret; } @@ -2669,7 +2707,7 @@ int cmd_stop_trace(struct ltt_session *session) sessiond_rotate_timer_stop(session); } - if (session->rotate_count > 0 && !session->rotate_pending) { + if (session->current_archive_id > 0 && !session->rotate_pending) { ret = rename_active_chunk(session); if (ret) { /* @@ -3017,6 +3055,59 @@ int cmd_destroy_session(struct ltt_session *session, int wpipe, PERROR("write kernel poll pipe"); } + if (session->shm_path[0]) { + /* + * When a session is created with an explicit shm_path, + * the consumer daemon will create its shared memory files + * at that location and will *not* unlink them. This is normal + * as the intention of that feature is to make it possible + * to retrieve the content of those files should a crash occur. + * + * To ensure the content of those files can be used, the + * sessiond daemon will replicate the content of the metadata + * cache in a metadata file. + * + * On clean-up, it is expected that the consumer daemon will + * unlink the shared memory files and that the session daemon + * will unlink the metadata file. Then, the session's directory + * in the shm path can be removed. + * + * Unfortunately, a flaw in the design of the sessiond's and + * consumerd's tear down of channels makes it impossible to + * determine when the sessiond _and_ the consumerd have both + * destroyed their representation of a channel. For one, the + * unlinking, close, and rmdir happen in deferred 'call_rcu' + * callbacks in both daemons. + * + * However, it is also impossible for the sessiond to know when + * the consumer daemon is done destroying its channel(s) since + * it occurs as a reaction to the closing of the channel's file + * descriptor. There is no resulting communication initiated + * from the consumerd to the sessiond to confirm that the + * operation is completed (and was successful). + * + * Until this is all fixed, the session daemon checks for the + * removal of the session's shm path which makes it possible + * to safely advertise a session as having been destroyed. + * + * Prior to this fix, it was not possible to reliably save + * a session making use of the --shm-path option, destroy it, + * and load it again. This is because the creation of the + * session would fail upon seeing the session's shm path + * already in existence. + * + * Note that none of the error paths in the check for the + * directory's existence return an error. This is normal + * as there isn't much that can be done. The session will + * be destroyed properly, except that we can't offer the + * guarantee that the same session can be re-created. + */ + current_completion_handler = &destroy_completion_handler.handler; + ret = lttng_strncpy(destroy_completion_handler.shm_path, + session->shm_path, + sizeof(destroy_completion_handler.shm_path)); + assert(!ret); + } ret = session_destroy(session); return ret; @@ -3671,10 +3762,12 @@ static int clear_metadata_file(int fd) { int ret; + off_t lseek_ret; - ret = lseek(fd, 0, SEEK_SET); - if (ret < 0) { + lseek_ret = lseek(fd, 0, SEEK_SET); + if (lseek_ret < 0) { PERROR("lseek"); + ret = -1; goto end; } @@ -4406,7 +4499,7 @@ int cmd_rotate_session(struct ltt_session *session, if (session->consumer->type == CONSUMER_DST_NET && (session->consumer->relay_major_version == 2 && session->consumer->relay_minor_version < 11)) { - ret = -LTTNG_ERR_ROTATION_NOT_AVAILABLE; + ret = -LTTNG_ERR_ROTATION_NOT_AVAILABLE_RELAY; goto end; } @@ -4428,7 +4521,7 @@ int cmd_rotate_session(struct ltt_session *session, } /* Special case for the first rotation. */ - if (session->rotate_count == 0) { + if (session->current_archive_id == 0) { const char *base_path = NULL; /* Either one of the two sessions is enough to get the root path. */ @@ -4464,7 +4557,7 @@ int cmd_rotate_session(struct ltt_session *session, } DBG("Current rotate path %s", session->rotation_chunk.current_rotate_path); - session->rotate_count++; + session->current_archive_id++; session->rotate_pending = true; session->rotation_state = LTTNG_ROTATION_STATE_ONGOING; @@ -4501,7 +4594,7 @@ int cmd_rotate_session(struct ltt_session *session, sizeof(session->rotation_chunk.active_tracing_path), "%s/%s-%" PRIu64, session_get_base_path(session), - datetime, session->rotate_count + 1); + datetime, session->current_archive_id + 1); if (ret < 0 || ret == sizeof(session->rotation_chunk.active_tracing_path)) { ERR("Failed to format active kernel tracing path in rotate session command"); ret = -LTTNG_ERR_UNK; @@ -4514,7 +4607,7 @@ int cmd_rotate_session(struct ltt_session *session, ret = snprintf(session->kernel_session->consumer->chunk_path, sizeof(session->kernel_session->consumer->chunk_path), "/%s-%" PRIu64, datetime, - session->rotate_count + 1); + session->current_archive_id + 1); if (ret < 0 || ret == sizeof(session->kernel_session->consumer->chunk_path)) { ERR("Failed to format the kernel consumer's sub-directory in rotate session command"); ret = -LTTNG_ERR_UNK; @@ -4543,7 +4636,7 @@ int cmd_rotate_session(struct ltt_session *session, ret = snprintf(session->rotation_chunk.active_tracing_path, PATH_MAX, "%s/%s-%" PRIu64, session_get_base_path(session), - datetime, session->rotate_count + 1); + datetime, session->current_archive_id + 1); if (ret < 0) { ERR("Failed to format active UST tracing path in rotate session command"); ret = -LTTNG_ERR_UNK; @@ -4551,7 +4644,7 @@ int cmd_rotate_session(struct ltt_session *session, } ret = snprintf(session->ust_session->consumer->chunk_path, PATH_MAX, "/%s-%" PRIu64, datetime, - session->rotate_count + 1); + session->current_archive_id + 1); if (ret < 0) { ERR("Failed to format the UST consumer's sub-directory in rotate session command"); ret = -LTTNG_ERR_UNK; @@ -4564,9 +4657,12 @@ int cmd_rotate_session(struct ltt_session *session, ret = domain_mkdir(session->ust_session->consumer, session, session->ust_session->uid, session->ust_session->gid); + if (ret) { + ret = -LTTNG_ERR_CREATE_DIR_FAIL; + goto end; + } ret = ust_app_rotate_session(session, &ust_active); if (ret != LTTNG_OK) { - ret = -LTTNG_ERR_CREATE_DIR_FAIL; goto end; } /* @@ -4591,11 +4687,11 @@ int cmd_rotate_session(struct ltt_session *session, } if (rotate_return) { - rotate_return->rotation_id = session->rotate_count; + rotate_return->rotation_id = session->current_archive_id; } - DBG("Cmd rotate session %s, rotate_id %" PRIu64 " sent", session->name, - session->rotate_count); + DBG("Cmd rotate session %s, current_archive_id %" PRIu64 " sent", + session->name, session->current_archive_id); ret = LTTNG_OK; end: @@ -4618,9 +4714,9 @@ int cmd_rotate_get_info(struct ltt_session *session, assert(session); DBG("Cmd rotate_get_info session %s, rotation id %" PRIu64, session->name, - session->rotate_count); + session->current_archive_id); - if (session->rotate_count != rotation_id) { + if (session->current_archive_id != rotation_id) { info_return->status = (int32_t) LTTNG_ROTATION_STATE_EXPIRED; ret = LTTNG_OK; goto end; @@ -4632,16 +4728,59 @@ int cmd_rotate_get_info(struct ltt_session *session, rotation_id, session->name); break; case LTTNG_ROTATION_STATE_COMPLETED: - ret = lttng_strncpy(info_return->path, + { + char *current_tracing_path_reply; + size_t current_tracing_path_reply_len; + + switch (session_get_consumer_destination_type(session)) { + case CONSUMER_DST_LOCAL: + current_tracing_path_reply = + info_return->location.local.absolute_path; + current_tracing_path_reply_len = + sizeof(info_return->location.local.absolute_path); + info_return->location_type = + (int8_t) LTTNG_TRACE_ARCHIVE_LOCATION_TYPE_LOCAL; + break; + case CONSUMER_DST_NET: + current_tracing_path_reply = + info_return->location.relay.relative_path; + current_tracing_path_reply_len = + sizeof(info_return->location.relay.relative_path); + /* Currently the only supported relay protocol. */ + info_return->location.relay.protocol = + (int8_t) LTTNG_TRACE_ARCHIVE_LOCATION_RELAY_PROTOCOL_TYPE_TCP; + + ret = lttng_strncpy(info_return->location.relay.host, + session_get_net_consumer_hostname(session), + sizeof(info_return->location.relay.host)); + if (ret) { + ERR("Failed to host name to rotate_get_info reply"); + info_return->status = LTTNG_ROTATION_STATUS_ERROR; + ret = -LTTNG_ERR_UNK; + goto end; + } + + session_get_net_consumer_ports(session, + &info_return->location.relay.ports.control, + &info_return->location.relay.ports.data); + info_return->location_type = + (int8_t) LTTNG_TRACE_ARCHIVE_LOCATION_TYPE_RELAY; + break; + default: + abort(); + } + ret = lttng_strncpy(current_tracing_path_reply, session->rotation_chunk.current_rotate_path, - sizeof(info_return->path)); + current_tracing_path_reply_len); if (ret) { - ERR("Failed to copy active tracing path to rotate_get_info reply"); + ERR("Failed to copy current tracing path to rotate_get_info reply"); info_return->status = LTTNG_ROTATION_STATUS_ERROR; ret = -LTTNG_ERR_UNK; goto end; } + break; + } case LTTNG_ROTATION_STATE_ERROR: DBG("Reporting that an error occurred during rotation %" PRIu64 " of session %s", rotation_id, session->name); @@ -4766,7 +4905,7 @@ int cmd_session_get_current_output(struct ltt_session *session, const char *path; if (!session->snapshot_mode) { - if (session->rotate_count == 0) { + if (session->current_archive_id == 0) { if (session->kernel_session) { path = session_get_base_path(session); } else if (session->ust_session) { @@ -4803,6 +4942,49 @@ end: return ret; } +/* Wait for a given path to be removed before continuing. */ +static enum lttng_error_code wait_on_path(void *path_data) +{ + const char *shm_path = path_data; + + DBG("Waiting for the shm path at %s to be removed before completing session destruction", + shm_path); + while (true) { + int ret; + struct stat st; + + ret = stat(shm_path, &st); + if (ret) { + if (errno != ENOENT) { + PERROR("stat() returned an error while checking for the existence of the shm path"); + } else { + DBG("shm path no longer exists, completing the destruction of session"); + } + break; + } else { + if (!S_ISDIR(st.st_mode)) { + ERR("The type of shm path %s returned by stat() is not a directory; aborting the wait for shm path removal", + shm_path); + break; + } + } + usleep(SESSION_DESTROY_SHM_PATH_CHECK_DELAY_US); + } + return LTTNG_OK; +} + +/* + * Returns a pointer to a handler to run on completion of a command. + * Returns NULL if no handler has to be run for the last command executed. + */ +const struct cmd_completion_handler *cmd_pop_completion_handler(void) +{ + struct cmd_completion_handler *handler = current_completion_handler; + + current_completion_handler = NULL; + return handler; +} + /* * Init command subsystem. */