From 5c408ad8ef08a226c018702aca969536f36ac4e5 Mon Sep 17 00:00:00 2001 From: Julien Desfossez Date: Mon, 18 Dec 2017 14:45:56 -0500 Subject: [PATCH] Rotate command MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This is the core of the session rotation command in the session daemon, no client interface for now. For each channel in the session, we send a request to the related consumer to rotate the channel and add that channel key and domain in the channel_pending_rotate_ht HT. When the consumer has finished the rotation of all the streams in the channel, it sends back a notification. The rotation thread in the session daemon looks up the channel information in the HT and finds the corresponding session. When all channels of a session have finished, the rotation thread asks the consumer to rename the chunk folder to append the timestamp of the end of the rotation. On the first rotation, we have an extra step to change the session directory layout from "/" to "/--1/". When the rotation starts, the new chunk folder is created immediately in: "/-2/" so we won't have to move the domain folder(s) after the next rotate has finished, just rename the chunk folder. The "mkdir" and "rename" commands are all propagated to the relay if needed, only the rotate_pending check on the relay is not part of this patch. Signed-off-by: Julien Desfossez Signed-off-by: Jérémie Galarneau --- include/lttng/lttng-error.h | 6 + include/lttng/rotate-internal.h | 39 ++ src/bin/lttng-sessiond/cmd.c | 444 ++++++++++++++++++++++- src/bin/lttng-sessiond/cmd.h | 3 + src/bin/lttng-sessiond/consumer.c | 66 ++++ src/bin/lttng-sessiond/consumer.h | 4 + src/bin/lttng-sessiond/kernel.c | 86 +++++ src/bin/lttng-sessiond/kernel.h | 1 + src/bin/lttng-sessiond/main.c | 92 +++++ src/bin/lttng-sessiond/rotate.c | 32 ++ src/bin/lttng-sessiond/rotate.h | 10 + src/bin/lttng-sessiond/rotation-thread.c | 1 + src/bin/lttng-sessiond/session.c | 5 +- src/bin/lttng-sessiond/session.h | 12 + src/bin/lttng-sessiond/ust-app.c | 193 ++++++++++ src/bin/lttng-sessiond/ust-app.h | 8 + src/common/consumer/consumer.h | 3 + src/common/error.c | 6 + src/common/sessiond-comm/sessiond-comm.h | 1 + tests/unit/test_ust_data.c | 13 + 20 files changed, 1023 insertions(+), 2 deletions(-) create mode 100644 include/lttng/rotate-internal.h diff --git a/include/lttng/lttng-error.h b/include/lttng/lttng-error.h index 1b5ea699a..c07bd57b2 100644 --- a/include/lttng/lttng-error.h +++ b/include/lttng/lttng-error.h @@ -149,6 +149,12 @@ enum lttng_error_code { LTTNG_ERR_TRIGGER_EXISTS = 126, /* Trigger already registered. */ LTTNG_ERR_TRIGGER_NOT_FOUND = 127, /* Trigger not found. */ LTTNG_ERR_COMMAND_CANCELLED = 128, /* Command cancelled. */ + LTTNG_ERR_ROTATION_PENDING = 129, /* Rotate already pending for this session. */ + LTTNG_ERR_ROTATION_NOT_AVAILABLE = 130, /* Rotate feature not available for this type of session (e.g: live) */ + LTTNG_ERR_ROTATION_TIMER_IS_SET = 131, /* Rotate timer already setup for this session. */ + LTTNG_ERR_ROTATION_SIZE_IS_SET = 132, /* Rotate size already setup for this session. */ + LTTNG_ERR_ROTATION_MULTIPLE_AFTER_STOP = 133, /* Already rotated once after a stop. */ + LTTNG_ERR_ROTATION_WRONG_VERSION = 134, /* Rotate not supported by this kernel tracer version */ /* MUST be last element */ LTTNG_ERR_NR, /* Last element */ diff --git a/include/lttng/rotate-internal.h b/include/lttng/rotate-internal.h new file mode 100644 index 000000000..908422e19 --- /dev/null +++ b/include/lttng/rotate-internal.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2017 - Julien Desfossez + * + * This library is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License, version 2.1 only, + * as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef LTTNG_ROTATE_INTERNAL_ABI_H +#define LTTNG_ROTATE_INTERNAL_ABI_H + +#include +#include + +#include +#include +#include + +/* + * Internal objects between lttng-ctl and the session daemon, the values + * are then copied to the user's lttng_rotate_session_handle object. + */ +/* For the LTTNG_ROTATE_SESSION command. */ +struct lttng_rotate_session_return { + uint64_t rotate_id; + /* Represents values defined in enum lttng_rotation_status. */ + int32_t status; +} LTTNG_PACKED; + +#endif /* LTTNG_ROTATE_INTERNAL_ABI_H */ diff --git a/src/bin/lttng-sessiond/cmd.c b/src/bin/lttng-sessiond/cmd.c index 1542fe8e1..606811e18 100644 --- a/src/bin/lttng-sessiond/cmd.c +++ b/src/bin/lttng-sessiond/cmd.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include "channel.h" @@ -51,6 +52,9 @@ #include "buffer-registry.h" #include "notification-thread.h" #include "notification-thread-commands.h" +#include "rotate.h" +#include "rotation-thread.h" +#include "sessiond-timer.h" #include "cmd.h" @@ -2519,7 +2523,18 @@ int cmd_start_trace(struct ltt_session *session) goto error; } + /* + * Record the timestamp of the first time the session is started for + * an eventual session rotation call. + */ if (!session->has_been_started) { + session->current_chunk_start_ts = time(NULL); + if (session->current_chunk_start_ts == (time_t) -1) { + PERROR("Failed to retrieve the \"%s\" session's start time", + session->name); + ret = LTTNG_ERR_FATAL; + goto error; + } if (!session->snapshot_mode && session->output_traces) { ret = session_mkdir(session); if (ret) { @@ -2558,12 +2573,60 @@ int cmd_start_trace(struct ltt_session *session) session->has_been_started = 1; session->active = 1; + /* + * Clear the flag that indicates that a rotation was done while the + * session was stopped. + */ + session->rotated_after_last_stop = false; + ret = LTTNG_OK; error: return ret; } +static +int rename_active_chunk(struct ltt_session *session) +{ + int ret; + + session->rotate_count++; + + /* + * The currently active tracing path is now the folder we + * want to rename. + */ + ret = lttng_strncpy(session->rotation_chunk.current_rotate_path, + session->rotation_chunk.active_tracing_path, + sizeof(session->rotation_chunk.current_rotate_path)); + if (ret) { + ERR("Failed to copy active tracing path"); + goto end; + } + + ret = rename_complete_chunk(session, time(NULL)); + if (ret < 0) { + ERR("Failed to rename current rotate path"); + goto end; + } + + /* + * We just renamed, the folder, we didn't do an actual rotation, so + * the active tracing path is now the renamed folder and we have to + * restore the rotate count. + */ + ret = lttng_strncpy(session->rotation_chunk.active_tracing_path, + session->rotation_chunk.current_rotate_path, + sizeof(session->rotation_chunk.active_tracing_path)); + if (ret) { + ERR("Failed to rename active session chunk tracing path"); + goto end; + } +end: + session->rotate_count--; + return ret; +} + /* * Command LTTNG_STOP_TRACE processed by the client thread. */ @@ -2573,9 +2636,11 @@ int cmd_stop_trace(struct ltt_session *session) struct ltt_kernel_channel *kchan; struct ltt_kernel_session *ksession; struct ltt_ust_session *usess; + bool error_occured = false; assert(session); + DBG("Begin stop session %s (id %" PRIu64 ")", session->name, session->id); /* Short cut */ ksession = session->kernel_session; usess = session->ust_session; @@ -2586,6 +2651,17 @@ int cmd_stop_trace(struct ltt_session *session) goto error; } + if (session->rotate_count > 0 && !session->rotate_pending) { + ret = rename_active_chunk(session); + if (ret) { + /* + * This error should not prevent the user from stopping + * the session. However, it will be reported at the end. + */ + error_occured = true; + } + } + /* Kernel tracer */ if (ksession && ksession->active) { DBG("Stop kernel tracing"); @@ -2615,6 +2691,8 @@ int cmd_stop_trace(struct ltt_session *session) } ksession->active = 0; + DBG("Kernel session stopped %s (id %" PRIu64 ")", session->name, + session->id); } if (usess && usess->active) { @@ -2633,7 +2711,7 @@ int cmd_stop_trace(struct ltt_session *session) /* Flag inactive after a successful stop. */ session->active = 0; - ret = LTTNG_OK; + ret = !error_occured ? LTTNG_OK : LTTNG_ERR_UNK; error: return ret; @@ -2869,6 +2947,17 @@ int cmd_destroy_session(struct ltt_session *session, int wpipe) usess = session->ust_session; ksess = session->kernel_session; + DBG("Begin destroy session %s (id %" PRIu64 ")", session->name, session->id); + + /* + * The rename of the current chunk is performed at stop, but if we rotated + * the session after the previous stop command, we need to rename the + * new (and empty) chunk that was started in between. + */ + if (session->rotated_after_last_stop) { + rename_active_chunk(session); + } + /* Clean kernel session teardown */ kernel_destroy_session(ksess); @@ -3246,6 +3335,8 @@ int cmd_data_pending(struct ltt_session *session) assert(session); + DBG("Data pending for session %s", session->name); + /* Session MUST be stopped to ask for data availability. */ if (session->active) { ret = LTTNG_ERR_SESSION_STARTED; @@ -3267,6 +3358,15 @@ int cmd_data_pending(struct ltt_session *session) } } + /* + * A rotation is still pending, we have to wait. + */ + if (session->rotate_pending) { + DBG("Rotate still pending for session %s", session->name); + ret = 1; + goto error; + } + if (ksess && ksess->consumer) { ret = consumer_is_data_pending(ksess->id, ksess->consumer); if (ret == 1) { @@ -4237,6 +4337,348 @@ int cmd_set_session_shm_path(struct ltt_session *session, return 0; } +/* + * Command LTTNG_ROTATE_SESSION from the lttng-ctl library. + * + * Ask the consumer to rotate the session output directory. + * The session lock must be held. + * + * Return LTTNG_OK on success or else a LTTNG_ERR code. + */ +int cmd_rotate_session(struct ltt_session *session, + struct lttng_rotate_session_return *rotate_return) +{ + int ret; + size_t strf_ret; + struct tm *timeinfo; + char datetime[16]; + time_t now; + bool ust_active = false; + + assert(session); + + if (!session->has_been_started) { + ret = -LTTNG_ERR_START_SESSION_ONCE; + goto error; + } + + if (session->live_timer || session->snapshot_mode || + !session->output_traces) { + ret = -LTTNG_ERR_ROTATION_NOT_AVAILABLE; + goto error; + } + + /* + * Unsupported feature in lttng-relayd before 2.11. + */ + if (session->consumer->type == CONSUMER_DST_NET && + (session->consumer->relay_major_version == 2 && + session->consumer->relay_minor_version < 11)) { + ret = -LTTNG_ERR_ROTATION_NOT_AVAILABLE; + goto error; + } + + if (session->rotate_pending || session->rotate_pending_relay) { + ret = -LTTNG_ERR_ROTATION_PENDING; + DBG("Rotate already in progress"); + goto error; + } + + /* + * After a stop, we only allow one rotation to occur, the other ones are + * useless until a new start. + */ + if (session->rotated_after_last_stop) { + DBG("Session \"%s\" was already rotated after stop, refusing rotation", + session->name); + ret = -LTTNG_ERR_ROTATION_MULTIPLE_AFTER_STOP; + goto error; + } + + /* Special case for the first rotation. */ + if (session->rotate_count == 0) { + const char *base_path = NULL; + + /* Either one of the two sessions is enough to get the root path. */ + if (session->kernel_session) { + base_path = session_get_base_path(session); + } else if (session->ust_session) { + base_path = session_get_base_path(session); + } else { + assert(0); + } + assert(base_path); + ret = lttng_strncpy(session->rotation_chunk.current_rotate_path, + base_path, + sizeof(session->rotation_chunk.current_rotate_path)); + if (ret) { + ERR("Failed to copy session base path to current rotation chunk path"); + ret = -LTTNG_ERR_UNK; + goto error; + } + } else { + /* + * The currently active tracing path is now the folder we + * want to rotate. + */ + ret = lttng_strncpy(session->rotation_chunk.current_rotate_path, + session->rotation_chunk.active_tracing_path, + sizeof(session->rotation_chunk.current_rotate_path)); + if (ret) { + ERR("Failed to copy the active tracing path to the current rotate path"); + ret = -LTTNG_ERR_UNK; + goto error; + } + } + DBG("Current rotate path %s", session->rotation_chunk.current_rotate_path); + + session->rotate_count++; + session->rotate_pending = true; + session->rotation_status = LTTNG_ROTATION_STATUS_STARTED; + + /* + * Create the path name for the next chunk. + */ + now = time(NULL); + if (now == (time_t) -1) { + ret = -LTTNG_ERR_ROTATION_NOT_AVAILABLE; + goto error; + } + session->last_chunk_start_ts = session->current_chunk_start_ts; + session->current_chunk_start_ts = now; + + timeinfo = localtime(&now); + if (!timeinfo) { + PERROR("Failed to sample local time in rotate session command"); + ret = -LTTNG_ERR_UNK; + goto error; + } + strf_ret = strftime(datetime, sizeof(datetime), "%Y%m%d-%H%M%S", + timeinfo); + if (!strf_ret) { + ERR("Failed to format local time timestamp in rotate session command"); + ret = -LTTNG_ERR_UNK; + goto error; + } + if (session->kernel_session) { + /* + * The active path for the next rotation/destroy. + * Ex: ~/lttng-traces/auto-20170922-111748/20170922-111754-42 + */ + ret = snprintf(session->rotation_chunk.active_tracing_path, + sizeof(session->rotation_chunk.active_tracing_path), + "%s/%s-%" PRIu64, + session_get_base_path(session), + datetime, session->rotate_count + 1); + if (ret < 0 || ret == sizeof(session->rotation_chunk.active_tracing_path)) { + ERR("Failed to format active kernel tracing path in rotate session command"); + ret = -LTTNG_ERR_UNK; + goto error; + } + /* + * The sub-directory for the consumer + * Ex: /20170922-111754-42/kernel + */ + ret = snprintf(session->kernel_session->consumer->chunk_path, + sizeof(session->kernel_session->consumer->chunk_path), + "/%s-%" PRIu64, datetime, + session->rotate_count + 1); + if (ret < 0 || ret == sizeof(session->kernel_session->consumer->chunk_path)) { + ERR("Failed to format the kernel consumer's sub-directory in rotate session command"); + ret = -LTTNG_ERR_UNK; + goto error; + } + /* + * Create the new chunk folder, before the rotation begins so we don't + * race with the consumer/tracer activity. + */ + ret = domain_mkdir(session->kernel_session->consumer, session, + session->kernel_session->uid, + session->kernel_session->gid); + if (ret) { + ERR("Failed to create kernel session tracing path at %s", + session->kernel_session->chunk_path); + goto error; + } + ret = kernel_rotate_session(session); + if (ret != LTTNG_OK) { + goto error; + } + } + if (session->ust_session) { + ret = snprintf(session->rotation_chunk.active_tracing_path, + PATH_MAX, "%s/%s-%" PRIu64, + session_get_base_path(session), + datetime, session->rotate_count + 1); + if (ret < 0) { + ERR("Failed to format active UST tracing path in rotate session command"); + ret = -LTTNG_ERR_UNK; + goto error; + } + ret = snprintf(session->ust_session->consumer->chunk_path, + PATH_MAX, "/%s-%" PRIu64, datetime, + session->rotate_count + 1); + if (ret < 0) { + ERR("Failed to format the UST consumer's sub-directory in rotate session command"); + ret = -LTTNG_ERR_UNK; + goto error; + } + /* + * Create the new chunk folder, before the rotation begins so we don't + * race with the consumer/tracer activity. + */ + ret = domain_mkdir(session->ust_session->consumer, session, + session->ust_session->uid, + session->ust_session->gid); + ret = ust_app_rotate_session(session, &ust_active); + if (ret != LTTNG_OK) { + goto error; + } + /* + * Handle the case where we did not start a rotation on any channel. + * The consumer will never wake up the rotation thread to perform the + * rename, so we have to do it here while we hold the session and + * session_list locks. + */ + if (!session->kernel_session && !ust_active) { + ret = rename_complete_chunk(session, now); + if (ret < 0) { + ERR("Failed to rename completed rotation chunk"); + goto end; + } + session->rotate_pending = false; + session->rotation_status = LTTNG_ROTATION_STATUS_COMPLETED; + } + } + + if (!session->active) { + session->rotated_after_last_stop = true; + } + + if (rotate_return) { + (*rotate_return)->rotate_id = session->rotate_count; + (*rotate_return)->status = LTTNG_ROTATION_STATUS_STARTED; + } + + + DBG("Cmd rotate session %s, rotate_id %" PRIu64 " completed", session->name, + session->rotate_count); + ret = LTTNG_OK; + + goto end; + +error: + if (rotate_return) { + (*rotate_return)->status = LTTNG_ROTATION_STATUS_ERROR; + } +end: + return ret; +} + +/* + * Command LTTNG_ROTATE_PENDING from the lttng-ctl library. + * + * Check if the session has finished its rotation. + * + * Return 0 on success or else a LTTNG_ERR code. + */ +int cmd_rotate_pending(struct ltt_session *session, + struct lttng_rotate_pending_return **pending_return, + uint64_t rotate_id) +{ + int ret; + + assert(session); + + DBG("Cmd rotate pending session %s, rotate_id %" PRIu64, session->name, + session->rotate_count); + + *pending_return = zmalloc(sizeof(struct lttng_rotate_pending_return)); + if (!*pending_return) { + ret = -ENOMEM; + goto end; + } + + if (session->rotate_count != rotate_id) { + (*pending_return)->status = LTTNG_ROTATION_STATUS_EXPIRED; + ret = LTTNG_OK; + goto end; + } + + if (session->rotation_status == LTTNG_ROTATION_STATUS_ERROR) { + DBG("An error occurred during rotation"); + (*pending_return)->status = LTTNG_ROTATION_STATUS_ERROR; + /* Rotate with a relay */ + } else if (session->rotate_pending_relay) { + DBG("Session %s, rotate_id %" PRIu64 " still pending", + session->name, session->rotate_count); + (*pending_return)->status = LTTNG_ROTATION_STATUS_STARTED; + } else if (session->rotate_pending) { + DBG("Session %s, rotate_id %" PRIu64 " still pending", + session->name, session->rotate_count); + (*pending_return)->status = LTTNG_ROTATION_STATUS_STARTED; + } else { + DBG("Session %s, rotate_id %" PRIu64 " finished", + session->name, session->rotate_count); + (*pending_return)->status = LTTNG_ROTATION_STATUS_COMPLETED; + ret = lttng_strncpy((*pending_return)->output_path, + session->rotation_chunk.current_rotate_path, + sizeof((*pending_return)->output_path)); + if (ret) { + ERR("Failed to copy active tracing path to rotate pending command reply"); + (*pending_return)->status = LTTNG_ROTATION_STATUS_ERROR; + ret = -1; + goto end; + } + } + + ret = LTTNG_OK; + + goto end; + +end: + return ret; +} + +/* + * Command ROTATE_GET_CURRENT_PATH from the lttng-ctl library. + * + * Configure the automatic rotation parameters. + * Set to -1ULL to disable them. + * + * Return LTTNG_OK on success or else a LTTNG_ERR code. + */ +int cmd_rotate_get_current_path(struct ltt_session *session, + struct lttng_rotate_get_current_path **get_return) +{ + int ret; + + *get_return = zmalloc(sizeof(struct lttng_rotate_get_current_path)); + if (!*get_return) { + ret = -ENOMEM; + goto end; + } + + if (session->rotate_count == 0) { + (*get_return)->status = LTTNG_ROTATION_STATUS_NO_ROTATION; + } else { + (*get_return)->status = session->rotation_status; + ret = lttng_strncpy((*get_return)->output_path, + session->rotation_chunk.current_rotate_path, + sizeof((*get_return)->output_path)); + if (ret) { + ERR("Failed to copy trace output path to rotate get current path command reply"); + ret = -1; + goto end; + } + } + + ret = LTTNG_OK; + +end: + return ret; +} + /* * Init command subsystem. */ diff --git a/src/bin/lttng-sessiond/cmd.h b/src/bin/lttng-sessiond/cmd.h index e7e344276..685c2f3b0 100644 --- a/src/bin/lttng-sessiond/cmd.h +++ b/src/bin/lttng-sessiond/cmd.h @@ -118,4 +118,7 @@ int cmd_register_trigger(struct command_ctx *cmd_ctx, int sock, int cmd_unregister_trigger(struct command_ctx *cmd_ctx, int sock, struct notification_thread_handle *notification_thread_handle); +int cmd_rotate_session(struct ltt_session *session, + struct lttng_rotate_session_return *rotate_return); + #endif /* CMD_H */ diff --git a/src/bin/lttng-sessiond/consumer.c b/src/bin/lttng-sessiond/consumer.c index 35d1b8aa2..a226b5726 100644 --- a/src/bin/lttng-sessiond/consumer.c +++ b/src/bin/lttng-sessiond/consumer.c @@ -1613,6 +1613,72 @@ end: return ret; } +/* + * Ask the consumer to rotate a channel. + * domain_path contains "/kernel" for kernel or the complete path for UST + * (ex: /ust/uid/1000/64-bit); + * + * The new_chunk_id is the session->rotate_count that has been incremented + * when the rotation started. On the relay, this allows to keep track in which + * chunk each stream is currently writing to (for the rotate_pending operation). + */ +int consumer_rotate_channel(struct consumer_socket *socket, uint64_t key, + uid_t uid, gid_t gid, struct consumer_output *output, + char *domain_path, bool is_metadata_channel, + uint64_t new_chunk_id, + bool *rotate_pending_relay) +{ + int ret; + struct lttcomm_consumer_msg msg; + + assert(socket); + + DBG("Consumer rotate channel key %" PRIu64, key); + + pthread_mutex_lock(socket->lock); + memset(&msg, 0, sizeof(msg)); + msg.cmd_type = LTTNG_CONSUMER_ROTATE_CHANNEL; + msg.u.rotate_channel.key = key; + msg.u.rotate_channel.metadata = !!is_metadata_channel; + msg.u.rotate_channel.new_chunk_id = new_chunk_id; + + if (output->type == CONSUMER_DST_NET) { + msg.u.rotate_channel.relayd_id = output->net_seq_index; + ret = snprintf(msg.u.rotate_channel.pathname, + sizeof(msg.u.rotate_channel.pathname), "%s%s%s", + output->dst.net.base_dir, + output->chunk_path, domain_path); + if (ret < 0 || ret == sizeof(msg.u.rotate_channel.pathname)) { + ERR("Failed to format channel path name when asking consumer to rotate channel"); + ret = -1; + goto error; + } + *rotate_pending_relay = true; + } else { + msg.u.rotate_channel.relayd_id = (uint64_t) -1ULL; + ret = snprintf(msg.u.rotate_channel.pathname, + sizeof(msg.u.rotate_channel.pathname), "%s%s%s", + output->dst.session_root_path, + output->chunk_path, domain_path); + if (ret < 0 || ret == sizeof(msg.u.rotate_channel.pathname)) { + ERR("Failed to format channel path name when asking consumer to rotate channel"); + ret = -1; + goto error; + } + } + + health_code_update(); + ret = consumer_send_msg(socket, &msg); + if (ret < 0) { + goto error; + } + +error: + pthread_mutex_unlock(socket->lock); + health_code_update(); + return ret; +} + int consumer_rotate_rename(struct consumer_socket *socket, uint64_t session_id, const struct consumer_output *output, const char *old_path, const char *new_path, uid_t uid, gid_t gid) diff --git a/src/bin/lttng-sessiond/consumer.h b/src/bin/lttng-sessiond/consumer.h index 36e7c83d5..fbd5b1152 100644 --- a/src/bin/lttng-sessiond/consumer.h +++ b/src/bin/lttng-sessiond/consumer.h @@ -322,6 +322,10 @@ int consumer_snapshot_channel(struct consumer_socket *socket, uint64_t key, struct snapshot_output *output, int metadata, uid_t uid, gid_t gid, const char *session_path, int wait, uint64_t nb_packets_per_stream); +int consumer_rotate_channel(struct consumer_socket *socket, uint64_t key, + uid_t uid, gid_t gid, struct consumer_output *output, + char *domain_path, bool is_metadata_channel, uint64_t new_chunk_id, + bool *rotate_pending_relay); int consumer_rotate_rename(struct consumer_socket *socket, uint64_t session_id, const struct consumer_output *output, const char *old_path, const char *new_path, uid_t uid, gid_t gid); diff --git a/src/bin/lttng-sessiond/kernel.c b/src/bin/lttng-sessiond/kernel.c index e5f068a0c..ccf315d89 100644 --- a/src/bin/lttng-sessiond/kernel.c +++ b/src/bin/lttng-sessiond/kernel.c @@ -33,6 +33,7 @@ #include "kernel-consumer.h" #include "kern-modules.h" #include "utils.h" +#include "rotate.h" /* * Key used to reference a channel between the sessiond and the consumer. This @@ -1133,3 +1134,88 @@ int kernel_supports_ring_buffer_snapshot_sample_positions(int tracer_fd) error: return ret; } + +/* + * Rotate a kernel session. + * + * Return 0 on success or else return a LTTNG_ERR code. + */ +int kernel_rotate_session(struct ltt_session *session) +{ + int ret; + struct consumer_socket *socket; + struct lttng_ht_iter iter; + struct ltt_kernel_session *ksess = session->kernel_session; + + assert(ksess); + assert(ksess->consumer); + + DBG("Rotate kernel session %s started (session %" PRIu64 ")", + session->name, session->id); + + rcu_read_lock(); + + /* + * Note that this loop will end after one iteration given that there is + * only one kernel consumer. + */ + cds_lfht_for_each_entry(ksess->consumer->socks->ht, &iter.iter, + socket, node.node) { + struct ltt_kernel_channel *chan; + + /* + * Account the metadata channel first to make sure the + * number of channels waiting for a rotation cannot + * reach 0 before we complete the iteration over all + * the channels. + */ + ret = rotate_add_channel_pending(ksess->metadata->fd, + LTTNG_DOMAIN_KERNEL, session); + if (ret < 0) { + ret = LTTNG_ERR_KERN_CONSUMER_FAIL; + goto error; + } + + /* For each channel, ask the consumer to rotate it. */ + cds_list_for_each_entry(chan, &ksess->channel_list.head, list) { + ret = rotate_add_channel_pending(chan->key, + LTTNG_DOMAIN_KERNEL, session); + if (ret < 0) { + ret = LTTNG_ERR_KERN_CONSUMER_FAIL; + goto error; + } + + DBG("Rotate channel %" PRIu64 ", session %s", chan->key, session->name); + ret = consumer_rotate_channel(socket, chan->key, + ksess->uid, ksess->gid, ksess->consumer, + ksess->consumer->subdir, + /* is_metadata_channel */ false, + session->rotate_count, + &session->rotate_pending_relay); + if (ret < 0) { + ret = LTTNG_ERR_KERN_CONSUMER_FAIL; + goto error; + } + } + + /* + * Rotate the metadata channel. + */ + ret = consumer_rotate_channel(socket, ksess->metadata->fd, + ksess->uid, ksess->gid, ksess->consumer, + ksess->consumer->subdir, + /* is_metadata_channel */ true, + session->rotate_count, + &session->rotate_pending_relay); + if (ret < 0) { + ret = LTTNG_ERR_KERN_CONSUMER_FAIL; + goto error; + } + } + + ret = LTTNG_OK; + +error: + rcu_read_unlock(); + return ret; +} diff --git a/src/bin/lttng-sessiond/kernel.h b/src/bin/lttng-sessiond/kernel.h index 17aede1ed..8d5ddb668 100644 --- a/src/bin/lttng-sessiond/kernel.h +++ b/src/bin/lttng-sessiond/kernel.h @@ -63,6 +63,7 @@ int kernel_snapshot_record(struct ltt_kernel_session *ksess, struct snapshot_output *output, int wait, uint64_t nb_packets_per_stream); int kernel_syscall_mask(int chan_fd, char **syscall_mask, uint32_t *nr_bits); +int kernel_rotate_session(struct ltt_session *session); int init_kernel_workarounds(void); ssize_t kernel_list_tracker_pids(struct ltt_kernel_session *session, diff --git a/src/bin/lttng-sessiond/main.c b/src/bin/lttng-sessiond/main.c index 489cc9d76..1d9ca91c5 100644 --- a/src/bin/lttng-sessiond/main.c +++ b/src/bin/lttng-sessiond/main.c @@ -2903,6 +2903,22 @@ static unsigned int lttng_sessions_count(uid_t uid, gid_t gid) return i; } +/* + * Check if the current kernel tracer supports the session rotation feature. + * Return 1 if it does, 0 otherwise. + */ +static int check_rotate_compatible(void) +{ + int ret = 1; + + if (kernel_tracer_version.major != 2 || kernel_tracer_version.minor < 11) { + DBG("Kernel tracer version is not compatible with the rotation feature"); + ret = 0; + } + + return ret; +} + /* * Process the command requested by the lttng client within the command * context structure. This function make sure that the return structure (llm) @@ -2947,6 +2963,9 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, case LTTNG_REGENERATE_STATEDUMP: case LTTNG_REGISTER_TRIGGER: case LTTNG_UNREGISTER_TRIGGER: + case LTTNG_ROTATE_SESSION: + case LTTNG_ROTATE_PENDING: + case LTTNG_ROTATE_GET_CURRENT_PATH: need_domain = 0; break; default: @@ -2989,6 +3008,8 @@ static int process_client_msg(struct command_ctx *cmd_ctx, int sock, case LTTNG_LIST_SYSCALLS: case LTTNG_LIST_TRACKER_PIDS: case LTTNG_DATA_PENDING: + case LTTNG_ROTATE_SESSION: + case LTTNG_ROTATE_PENDING: break; default: /* Setup lttng message with no payload */ @@ -4077,6 +4098,77 @@ error_add_context: notification_thread_handle); break; } + case LTTNG_ROTATE_SESSION: + { + struct lttng_rotate_session_return rotate_return; + + DBG("Client rotate session \"%s\"", cmd_ctx->session->name); + + if (cmd_ctx->session->kernel_session && !check_rotate_compatible()) { + DBG("Kernel tracer version is not compatible with the rotation feature"); + ret = LTTNG_ERR_ROTATION_WRONG_VERSION; + goto error; + } + + ret = cmd_rotate_session(cmd_ctx->session, &rotate_return); + if (ret < 0) { + ret = -ret; + goto error; + } + + ret = setup_lttng_msg_no_cmd_header(cmd_ctx, &rotate_return, + sizeof(rotate_return)); + if (ret < 0) { + ret = -ret; + goto error; + } + + ret = LTTNG_OK; + break; + } + case LTTNG_ROTATE_PENDING: + { + struct lttng_rotate_pending_return *pending_return = NULL; + + ret = cmd_rotate_pending(cmd_ctx->session, &pending_return, + cmd_ctx->lsm->u.rotate_pending.rotate_id); + if (ret < 0) { + ret = -ret; + goto error; + } + + ret = setup_lttng_msg_no_cmd_header(cmd_ctx, pending_return, + sizeof(struct lttng_rotate_session_handle)); + free(pending_return); + if (ret < 0) { + ret = -ret; + goto error; + } + + ret = LTTNG_OK; + break; + } + case LTTNG_ROTATE_GET_CURRENT_PATH: + { + struct lttng_rotate_get_current_path *get_return = NULL; + + ret = cmd_rotate_get_current_path(cmd_ctx->session, &get_return); + if (ret < 0) { + ret = -ret; + goto error; + } + + ret = setup_lttng_msg_no_cmd_header(cmd_ctx, get_return, + sizeof(struct lttng_rotate_get_current_path)); + free(get_return); + if (ret < 0) { + ret = -ret; + goto error; + } + + ret = LTTNG_OK; + break; + } default: ret = LTTNG_ERR_UND; break; diff --git a/src/bin/lttng-sessiond/rotate.c b/src/bin/lttng-sessiond/rotate.c index 05e9bb08a..49ccea94e 100644 --- a/src/bin/lttng-sessiond/rotate.c +++ b/src/bin/lttng-sessiond/rotate.c @@ -50,6 +50,38 @@ unsigned long hash_channel_key(struct rotation_channel_key *key) (void *) (unsigned long) key->domain, lttng_ht_seed); } +int rotate_add_channel_pending(uint64_t key, enum lttng_domain_type domain, + struct ltt_session *session) +{ + int ret; + struct rotation_channel_info *new_info; + struct rotation_channel_key channel_key = { .key = key, + .domain = domain }; + + new_info = zmalloc(sizeof(struct rotation_channel_info)); + if (!new_info) { + goto error; + } + + new_info->channel_key.key = key; + new_info->channel_key.domain = domain; + new_info->session_id = session->id; + cds_lfht_node_init(&new_info->rotate_channels_ht_node); + + session->nr_chan_rotate_pending++; + cds_lfht_add(channel_pending_rotate_ht, + hash_channel_key(&channel_key), + &new_info->rotate_channels_ht_node); + + ret = 0; + goto end; + +error: + ret = -1; +end: + return ret; +} + /* The session's lock must be held by the caller. */ static int session_rename_chunk(struct ltt_session *session, char *current_path, diff --git a/src/bin/lttng-sessiond/rotate.h b/src/bin/lttng-sessiond/rotate.h index b2f006f3e..e2c0829bc 100644 --- a/src/bin/lttng-sessiond/rotate.h +++ b/src/bin/lttng-sessiond/rotate.h @@ -48,4 +48,14 @@ unsigned long hash_channel_key(struct rotation_channel_key *key); /* session lock must be held by this function's caller. */ int rename_complete_chunk(struct ltt_session *session, time_t ts); +/* + * When we start the rotation of a channel, we add its information in + * channel_pending_rotate_ht. This is called in the context of + * thread_manage_client when the client asks for a rotation, in the context + * of the sessiond_timer thread when periodic rotations are enabled and from + * the rotation_thread when size-based rotations are enabled. + */ +int rotate_add_channel_pending(uint64_t key, enum lttng_domain_type domain, + struct ltt_session *session); + #endif /* ROTATE_H */ diff --git a/src/bin/lttng-sessiond/rotation-thread.c b/src/bin/lttng-sessiond/rotation-thread.c index 1704777a0..a1471e3c4 100644 --- a/src/bin/lttng-sessiond/rotation-thread.c +++ b/src/bin/lttng-sessiond/rotation-thread.c @@ -33,6 +33,7 @@ #include #include +#include #include "rotation-thread.h" #include "lttng-sessiond.h" diff --git a/src/bin/lttng-sessiond/session.c b/src/bin/lttng-sessiond/session.c index 5a10340a7..942d68c13 100644 --- a/src/bin/lttng-sessiond/session.c +++ b/src/bin/lttng-sessiond/session.c @@ -328,7 +328,7 @@ int session_destroy(struct ltt_session *session) /* Safety check */ assert(session); - DBG("Destroying session %s", session->name); + DBG("Destroying session %s (id %" PRIu64 ")", session->name, session->id); del_session_list(session); pthread_mutex_destroy(&session->lock); del_session_ht(session); @@ -400,6 +400,9 @@ int session_create(char *name, uid_t uid, gid_t gid) goto error; } + new_session->rotate_pending = false; + new_session->rotate_pending_relay = false; + /* Add new session to the session list */ session_lock_list(); new_session->id = add_session_list(new_session); diff --git a/src/bin/lttng-sessiond/session.h b/src/bin/lttng-sessiond/session.h index 3b3380c3f..78890db3c 100644 --- a/src/bin/lttng-sessiond/session.h +++ b/src/bin/lttng-sessiond/session.h @@ -19,6 +19,7 @@ #define _LTT_SESSION_H #include +#include #include #include @@ -127,6 +128,10 @@ struct ltt_session { * rotate_pending_relay. */ bool rotate_pending; + /* + * True until the relay has finished the rotation of all the streams. + */ + bool rotate_pending_relay; /* Current status of a rotation. */ enum lttng_rotation_status rotation_status; /* @@ -162,6 +167,13 @@ struct ltt_session { * with the current timestamp. */ time_t current_chunk_start_ts; + /* + * Keep a state if this session was rotated after the last stop command. + * We only allow one rotation after a stop. At destroy, we also need to + * know if a rotation occured since the last stop to rename the current + * chunk. + */ + bool rotated_after_last_stop; }; /* Prototypes */ diff --git a/src/bin/lttng-sessiond/ust-app.c b/src/bin/lttng-sessiond/ust-app.c index 0db0eb173..38aeaab60 100644 --- a/src/bin/lttng-sessiond/ust-app.c +++ b/src/bin/lttng-sessiond/ust-app.c @@ -43,6 +43,7 @@ #include "session.h" #include "lttng-sessiond.h" #include "notification-thread-commands.h" +#include "rotate.h" static int ust_app_flush_app_session(struct ust_app *app, struct ust_app_session *ua_sess); @@ -6288,3 +6289,195 @@ int ust_app_regenerate_statedump_all(struct ltt_ust_session *usess) return 0; } + +/* + * Rotate all the channels of a session. + * + * Return 0 on success or else a negative value. + */ +int ust_app_rotate_session(struct ltt_session *session, bool *ust_active) +{ + int ret = 0; + struct lttng_ht_iter iter; + struct ust_app *app; + struct ltt_ust_session *usess = session->ust_session; + char pathname[LTTNG_PATH_MAX]; + + assert(usess); + + rcu_read_lock(); + + switch (usess->buffer_type) { + case LTTNG_BUFFER_PER_UID: + { + struct buffer_reg_uid *reg; + + cds_list_for_each_entry(reg, &usess->buffer_reg_uid_list, lnode) { + struct buffer_reg_channel *reg_chan; + struct consumer_socket *socket; + + /* Get consumer socket to use to push the metadata.*/ + socket = consumer_find_socket_by_bitness(reg->bits_per_long, + usess->consumer); + if (!socket) { + ret = -EINVAL; + goto error; + } + + /* + * Account the metadata channel first to make sure the + * number of channels waiting for a rotation cannot + * reach 0 before we complete the iteration over all + * the channels. + */ + ret = rotate_add_channel_pending( + reg->registry->reg.ust->metadata_key, + LTTNG_DOMAIN_UST, session); + if (ret < 0) { + ret = LTTNG_ERR_KERN_CONSUMER_FAIL; + goto error; + } + + ret = snprintf(pathname, sizeof(pathname), + DEFAULT_UST_TRACE_DIR "/" DEFAULT_UST_TRACE_UID_PATH, + reg->uid, reg->bits_per_long); + if (ret < 0 || ret == sizeof(pathname)) { + PERROR("Failed to format rotation path"); + goto error; + } + + /* Rotate the data channels. */ + cds_lfht_for_each_entry(reg->registry->channels->ht, &iter.iter, + reg_chan, node.node) { + ret = rotate_add_channel_pending( + reg_chan->consumer_key, + LTTNG_DOMAIN_UST, session); + if (ret < 0) { + ret = LTTNG_ERR_KERN_CONSUMER_FAIL; + goto error; + } + ret = consumer_rotate_channel(socket, + reg_chan->consumer_key, + usess->uid, usess->gid, + usess->consumer, pathname, + /* is_metadata_channel */ false, + session->rotate_count, + &session->rotate_pending_relay); + if (ret < 0) { + goto error; + } + } + + (void) push_metadata(reg->registry->reg.ust, usess->consumer); + + ret = consumer_rotate_channel(socket, + reg->registry->reg.ust->metadata_key, + usess->uid, usess->gid, + usess->consumer, pathname, + /* is_metadata_channel */ true, + session->rotate_count, + &session->rotate_pending_relay); + if (ret < 0) { + goto error; + } + *ust_active = true; + } + break; + } + case LTTNG_BUFFER_PER_PID: + { + cds_lfht_for_each_entry(ust_app_ht->ht, &iter.iter, app, pid_n.node) { + struct consumer_socket *socket; + struct lttng_ht_iter chan_iter; + struct ust_app_channel *ua_chan; + struct ust_app_session *ua_sess; + struct ust_registry_session *registry; + + ua_sess = lookup_session_by_app(usess, app); + if (!ua_sess) { + /* Session not associated with this app. */ + continue; + } + ret = snprintf(pathname, sizeof(pathname), + DEFAULT_UST_TRACE_DIR "/%s", + ua_sess->path); + if (ret < 0 || ret == sizeof(pathname)) { + PERROR("Failed to format rotation path"); + goto error; + } + + /* Get the right consumer socket for the application. */ + socket = consumer_find_socket_by_bitness(app->bits_per_long, + usess->consumer); + if (!socket) { + ret = -EINVAL; + goto error; + } + + registry = get_session_registry(ua_sess); + if (!registry) { + DBG("Application session is being torn down. Abort snapshot record."); + ret = -1; + goto error; + } + + /* + * Account the metadata channel first to make sure the + * number of channels waiting for a rotation cannot + * reach 0 before we complete the iteration over all + * the channels. + */ + ret = rotate_add_channel_pending(registry->metadata_key, + LTTNG_DOMAIN_UST, session); + if (ret < 0) { + ret = LTTNG_ERR_KERN_CONSUMER_FAIL; + goto error; + } + + /* Rotate the data channels. */ + cds_lfht_for_each_entry(ua_sess->channels->ht, &chan_iter.iter, + ua_chan, node.node) { + ret = rotate_add_channel_pending( + ua_chan->key, LTTNG_DOMAIN_UST, + session); + if (ret < 0) { + ret = LTTNG_ERR_KERN_CONSUMER_FAIL; + goto error; + } + ret = consumer_rotate_channel(socket, ua_chan->key, + ua_sess->euid, ua_sess->egid, + ua_sess->consumer, pathname, + /* is_metadata_channel */ false, + session->rotate_count, + &session->rotate_pending_relay); + if (ret < 0) { + goto error; + } + } + + /* Rotate the metadata channel. */ + (void) push_metadata(registry, usess->consumer); + ret = consumer_rotate_channel(socket, registry->metadata_key, + ua_sess->euid, ua_sess->egid, + ua_sess->consumer, pathname, + /* is_metadata_channel */ true, + session->rotate_count, + &session->rotate_pending_relay); + if (ret < 0) { + goto error; + } + *ust_active = true; + } + break; + } + default: + assert(0); + break; + } + + ret = LTTNG_OK; + +error: + rcu_read_unlock(); + return ret; +} diff --git a/src/bin/lttng-sessiond/ust-app.h b/src/bin/lttng-sessiond/ust-app.h index 1b36496da..c3e37e071 100644 --- a/src/bin/lttng-sessiond/ust-app.h +++ b/src/bin/lttng-sessiond/ust-app.h @@ -25,6 +25,7 @@ #include "trace-ust.h" #include "ust-registry.h" +#include "session.h" #define UST_APP_EVENT_LIST_SIZE 32 @@ -355,6 +356,7 @@ int ust_app_pid_get_channel_runtime_stats(struct ltt_ust_session *usess, struct consumer_output *consumer, int overwrite, uint64_t *discarded, uint64_t *lost); int ust_app_regenerate_statedump_all(struct ltt_ust_session *usess); +int ust_app_rotate_session(struct ltt_session *session, bool *ust_active); static inline int ust_app_supported(void) @@ -588,6 +590,12 @@ int ust_app_regenerate_statedump_all(struct ltt_ust_session *usess) return 0; } +static inline +int ust_app_rotate_session(struct ltt_session *session, bool *ust_active) +{ + return 0; +} + #endif /* HAVE_LIBLTTNG_UST_CTL */ #endif /* _LTT_UST_APP_H */ diff --git a/src/common/consumer/consumer.h b/src/common/consumer/consumer.h index fb535fba6..af36de11c 100644 --- a/src/common/consumer/consumer.h +++ b/src/common/consumer/consumer.h @@ -65,6 +65,7 @@ enum lttng_consumer_command { LTTNG_CONSUMER_SET_CHANNEL_ROTATE_PIPE, LTTNG_CONSUMER_ROTATE_CHANNEL, LTTNG_CONSUMER_ROTATE_RENAME, + LTTNG_CONSUMER_ROTATE_PENDING_RELAY, LTTNG_CONSUMER_MKDIR, }; @@ -828,6 +829,8 @@ int lttng_consumer_rotate_ready_streams(uint64_t key, struct lttng_consumer_local_data *ctx); int lttng_consumer_rotate_rename(const char *current_path, const char *new_path, uid_t uid, gid_t gid, uint64_t relayd_id); +int lttng_consumer_rotate_pending_relay( uint64_t session_id, + uint64_t relayd_id, uint64_t chunk_id); void lttng_consumer_reset_stream_rotate_state(struct lttng_consumer_stream *stream); int lttng_consumer_mkdir(const char *path, uid_t uid, gid_t gid, uint64_t relayd_id); diff --git a/src/common/error.c b/src/common/error.c index db3042318..c859eeca5 100644 --- a/src/common/error.c +++ b/src/common/error.c @@ -190,6 +190,12 @@ static const char *error_string_array[] = { [ ERROR_INDEX(LTTNG_ERR_TRIGGER_EXISTS) ] = "Trigger already registered", [ ERROR_INDEX(LTTNG_ERR_TRIGGER_NOT_FOUND) ] = "Trigger not found", [ ERROR_INDEX(LTTNG_ERR_COMMAND_CANCELLED) ] = "Command cancelled", + [ ERROR_INDEX(LTTNG_ERR_ROTATION_PENDING) ] = "Rotation already pending for this session", + [ ERROR_INDEX(LTTNG_ERR_ROTATION_NOT_AVAILABLE) ] = "Rotation feature not available for this session's creation mode", + [ ERROR_INDEX(LTTNG_ERR_ROTATION_TIMER_IS_SET) ] = "Automatic rotation schedule with a timer condition already set for this session", + [ ERROR_INDEX(LTTNG_ERR_ROTATION_SIZE_IS_SET) ] = "Automatic rotation schedule with a size threshold condition already set for this session", + [ ERROR_INDEX(LTTNG_ERR_ROTATION_MULTIPLE_AFTER_STOP) ] = "Session was already rotated once since it became inactive", + [ ERROR_INDEX(LTTNG_ERR_ROTATION_WRONG_VERSION) ] = "Rotation feature is not supported by this kernel tracer version", /* Last element */ [ ERROR_INDEX(LTTNG_ERR_NR) ] = "Unknown error code" diff --git a/src/common/sessiond-comm/sessiond-comm.h b/src/common/sessiond-comm/sessiond-comm.h index 1b13daec4..e931c6931 100644 --- a/src/common/sessiond-comm/sessiond-comm.h +++ b/src/common/sessiond-comm/sessiond-comm.h @@ -100,6 +100,7 @@ enum lttcomm_sessiond_command { LTTNG_REGENERATE_STATEDUMP = 42, LTTNG_REGISTER_TRIGGER = 43, LTTNG_UNREGISTER_TRIGGER = 44, + LTTNG_ROTATE_SESSION = 45, }; enum lttcomm_relayd_command { diff --git a/tests/unit/test_ust_data.c b/tests/unit/test_ust_data.c index 43ad3115b..ac3350d79 100644 --- a/tests/unit/test_ust_data.c +++ b/tests/unit/test_ust_data.c @@ -63,6 +63,19 @@ static char random_string[RANDOM_STRING_LEN]; static struct ltt_ust_session *usess; static struct lttng_domain dom; +/* + * Stub to prevent an undefined reference in this test without having to link + * the entire tree because of a cascade of dependencies. This is not used, + * it is just there to prevent GCC from complaining. + */ +int rotate_add_channel_pending(uint64_t key, enum lttng_domain_type domain, + struct ltt_session *session) +{ + ERR("Stub called instead of the real function"); + abort(); + return -1; +} + /* * Return random string of 10 characters. * Not thread-safe. -- 2.34.1