#include <inttypes.h>
#include <urcu/list.h>
#include <urcu/uatomic.h>
+#include <sys/stat.h>
#include <common/defaults.h>
#include <common/common.h>
#include "kernel-consumer.h"
#include "lttng-sessiond.h"
#include "utils.h"
-#include "syscall.h"
+#include "lttng-syscall.h"
#include "agent.h"
#include "buffer-registry.h"
#include "notification-thread.h"
#include "rotate.h"
#include "rotation-thread.h"
#include "sessiond-timer.h"
+#include "agent-thread.h"
#include "cmd.h"
+/* Sleep for 100ms between each check for the shm path's deletion. */
+#define SESSION_DESTROY_SHM_PATH_CHECK_DELAY_US 100000
+
+static enum lttng_error_code wait_on_path(void *path);
+
+/*
+ * Command completion handler that is used by the destroy command
+ * when a session that has a non-default shm_path is being destroyed.
+ *
+ * See comment in cmd_destroy_session() for the rationale.
+ */
+static struct destroy_completion_handler {
+ struct cmd_completion_handler handler;
+ char shm_path[member_sizeof(struct ltt_session, shm_path)];
+} destroy_completion_handler = {
+ .handler = {
+ .run = wait_on_path,
+ .data = destroy_completion_handler.shm_path
+ },
+ .shm_path = { 0 },
+};
+
+static struct cmd_completion_handler *current_completion_handler;
+
/*
* Used to keep a unique index for each relayd socket created where this value
* is associated with streams on the consumer so it can match the right relayd
*
* The consumer socket lock must be held by the caller.
*/
-static int send_consumer_relayd_socket(enum lttng_domain_type domain,
- unsigned int session_id, struct lttng_uri *relayd_uri,
+static int send_consumer_relayd_socket(unsigned int session_id,
+ struct lttng_uri *relayd_uri,
struct consumer_output *consumer,
struct consumer_socket *consumer_sock,
char *session_name, char *hostname, int session_live_timer)
/* Sending control relayd socket. */
if (!sock->control_sock_sent) {
- ret = send_consumer_relayd_socket(domain, session_id,
+ ret = send_consumer_relayd_socket(session_id,
&consumer->dst.net.control, consumer, sock,
session_name, hostname, session_live_timer);
if (ret != LTTNG_OK) {
/* Sending data relayd socket. */
if (!sock->data_sock_sent) {
- ret = send_consumer_relayd_socket(domain, session_id,
+ ret = send_consumer_relayd_socket(session_id,
&consumer->dst.net.data, consumer, sock,
session_name, hostname, session_live_timer);
if (ret != LTTNG_OK) {
}
/* Quiescent wait after starting trace */
- kernel_wait_quiescent(kernel_tracer_fd);
+ kernel_wait_quiescent(wpipe);
ksess->active = 1;
break;
}
case LTTNG_DOMAIN_UST:
+ break;
case LTTNG_DOMAIN_JUL:
case LTTNG_DOMAIN_LOG4J:
case LTTNG_DOMAIN_PYTHON:
+ if (!agent_tracing_is_enabled()) {
+ DBG("Attempted to enable a channel in an agent domain but the agent thread is not running");
+ ret = LTTNG_ERR_AGENT_TRACING_DISABLED;
+ goto error;
+ }
break;
default:
ret = LTTNG_ERR_UNKNOWN_DOMAIN;
assert(usess);
+ if (!agent_tracing_is_enabled()) {
+ DBG("Attempted to enable an event in an agent domain but the agent thread is not running");
+ ret = LTTNG_ERR_AGENT_TRACING_DISABLED;
+ goto error;
+ }
+
agt = trace_ust_find_agent(usess, domain->type);
if (!agt) {
agt = agent_create(domain->type);
{
int ret;
- session->rotate_count++;
+ session->current_archive_id++;
/*
* The currently active tracing path is now the folder we
goto end;
}
end:
- session->rotate_count--;
+ session->current_archive_id--;
return ret;
}
sessiond_rotate_timer_stop(session);
}
- if (session->rotate_count > 0 && !session->rotate_pending) {
+ if (session->current_archive_id > 0 && !session->rotate_pending) {
ret = rename_active_chunk(session);
if (ret) {
/*
*
* Called with session lock held.
*/
-int cmd_destroy_session(struct ltt_session *session, int wpipe)
+int cmd_destroy_session(struct ltt_session *session, int wpipe,
+ struct notification_thread_handle *notification_thread_handle)
{
int ret;
struct ltt_ust_session *usess;
sessiond_rotate_timer_stop(session);
}
+ if (session->rotate_size) {
+ unsubscribe_session_consumed_size_rotation(session, notification_thread_handle);
+ session->rotate_size = 0;
+ }
+
/*
* The rename of the current chunk is performed at stop, but if we rotated
* the session after the previous stop command, we need to rename the
PERROR("write kernel poll pipe");
}
+ if (session->shm_path[0]) {
+ /*
+ * When a session is created with an explicit shm_path,
+ * the consumer daemon will create its shared memory files
+ * at that location and will *not* unlink them. This is normal
+ * as the intention of that feature is to make it possible
+ * to retrieve the content of those files should a crash occur.
+ *
+ * To ensure the content of those files can be used, the
+ * sessiond daemon will replicate the content of the metadata
+ * cache in a metadata file.
+ *
+ * On clean-up, it is expected that the consumer daemon will
+ * unlink the shared memory files and that the session daemon
+ * will unlink the metadata file. Then, the session's directory
+ * in the shm path can be removed.
+ *
+ * Unfortunately, a flaw in the design of the sessiond's and
+ * consumerd's tear down of channels makes it impossible to
+ * determine when the sessiond _and_ the consumerd have both
+ * destroyed their representation of a channel. For one, the
+ * unlinking, close, and rmdir happen in deferred 'call_rcu'
+ * callbacks in both daemons.
+ *
+ * However, it is also impossible for the sessiond to know when
+ * the consumer daemon is done destroying its channel(s) since
+ * it occurs as a reaction to the closing of the channel's file
+ * descriptor. There is no resulting communication initiated
+ * from the consumerd to the sessiond to confirm that the
+ * operation is completed (and was successful).
+ *
+ * Until this is all fixed, the session daemon checks for the
+ * removal of the session's shm path which makes it possible
+ * to safely advertise a session as having been destroyed.
+ *
+ * Prior to this fix, it was not possible to reliably save
+ * a session making use of the --shm-path option, destroy it,
+ * and load it again. This is because the creation of the
+ * session would fail upon seeing the session's shm path
+ * already in existence.
+ *
+ * Note that none of the error paths in the check for the
+ * directory's existence return an error. This is normal
+ * as there isn't much that can be done. The session will
+ * be destroyed properly, except that we can't offer the
+ * guarantee that the same session can be re-created.
+ */
+ current_completion_handler = &destroy_completion_handler.handler;
+ ret = lttng_strncpy(destroy_completion_handler.shm_path,
+ session->shm_path,
+ sizeof(destroy_completion_handler.shm_path));
+ assert(!ret);
+ }
ret = session_destroy(session);
return ret;
int clear_metadata_file(int fd)
{
int ret;
+ off_t lseek_ret;
- ret = lseek(fd, 0, SEEK_SET);
- if (ret < 0) {
+ lseek_ret = lseek(fd, 0, SEEK_SET);
+ if (lseek_ret < 0) {
PERROR("lseek");
+ ret = -1;
goto end;
}
if (session->consumer->type == CONSUMER_DST_NET &&
(session->consumer->relay_major_version == 2 &&
session->consumer->relay_minor_version < 11)) {
- ret = -LTTNG_ERR_ROTATION_NOT_AVAILABLE;
+ ret = -LTTNG_ERR_ROTATION_NOT_AVAILABLE_RELAY;
goto end;
}
}
/* Special case for the first rotation. */
- if (session->rotate_count == 0) {
+ if (session->current_archive_id == 0) {
const char *base_path = NULL;
/* Either one of the two sessions is enough to get the root path. */
}
DBG("Current rotate path %s", session->rotation_chunk.current_rotate_path);
- session->rotate_count++;
+ session->current_archive_id++;
session->rotate_pending = true;
session->rotation_state = LTTNG_ROTATION_STATE_ONGOING;
sizeof(session->rotation_chunk.active_tracing_path),
"%s/%s-%" PRIu64,
session_get_base_path(session),
- datetime, session->rotate_count + 1);
+ datetime, session->current_archive_id + 1);
if (ret < 0 || ret == sizeof(session->rotation_chunk.active_tracing_path)) {
ERR("Failed to format active kernel tracing path in rotate session command");
ret = -LTTNG_ERR_UNK;
ret = snprintf(session->kernel_session->consumer->chunk_path,
sizeof(session->kernel_session->consumer->chunk_path),
"/%s-%" PRIu64, datetime,
- session->rotate_count + 1);
+ session->current_archive_id + 1);
if (ret < 0 || ret == sizeof(session->kernel_session->consumer->chunk_path)) {
ERR("Failed to format the kernel consumer's sub-directory in rotate session command");
ret = -LTTNG_ERR_UNK;
ret = snprintf(session->rotation_chunk.active_tracing_path,
PATH_MAX, "%s/%s-%" PRIu64,
session_get_base_path(session),
- datetime, session->rotate_count + 1);
+ datetime, session->current_archive_id + 1);
if (ret < 0) {
ERR("Failed to format active UST tracing path in rotate session command");
ret = -LTTNG_ERR_UNK;
}
ret = snprintf(session->ust_session->consumer->chunk_path,
PATH_MAX, "/%s-%" PRIu64, datetime,
- session->rotate_count + 1);
+ session->current_archive_id + 1);
if (ret < 0) {
ERR("Failed to format the UST consumer's sub-directory in rotate session command");
ret = -LTTNG_ERR_UNK;
ret = domain_mkdir(session->ust_session->consumer, session,
session->ust_session->uid,
session->ust_session->gid);
+ if (ret) {
+ ret = -LTTNG_ERR_CREATE_DIR_FAIL;
+ goto end;
+ }
ret = ust_app_rotate_session(session, &ust_active);
if (ret != LTTNG_OK) {
- ret = -LTTNG_ERR_CREATE_DIR_FAIL;
goto end;
}
/*
}
if (rotate_return) {
- rotate_return->rotation_id = session->rotate_count;
+ rotate_return->rotation_id = session->current_archive_id;
}
- DBG("Cmd rotate session %s, rotate_id %" PRIu64 " sent", session->name,
- session->rotate_count);
+ DBG("Cmd rotate session %s, current_archive_id %" PRIu64 " sent",
+ session->name, session->current_archive_id);
ret = LTTNG_OK;
end:
assert(session);
DBG("Cmd rotate_get_info session %s, rotation id %" PRIu64, session->name,
- session->rotate_count);
+ session->current_archive_id);
- if (session->rotate_count != rotation_id) {
+ if (session->current_archive_id != rotation_id) {
info_return->status = (int32_t) LTTNG_ROTATION_STATE_EXPIRED;
ret = LTTNG_OK;
goto end;
rotation_id, session->name);
break;
case LTTNG_ROTATION_STATE_COMPLETED:
- ret = lttng_strncpy(info_return->path,
+ {
+ char *current_tracing_path_reply;
+ size_t current_tracing_path_reply_len;
+
+ switch (session_get_consumer_destination_type(session)) {
+ case CONSUMER_DST_LOCAL:
+ current_tracing_path_reply =
+ info_return->location.local.absolute_path;
+ current_tracing_path_reply_len =
+ sizeof(info_return->location.local.absolute_path);
+ info_return->location_type =
+ (int8_t) LTTNG_TRACE_ARCHIVE_LOCATION_TYPE_LOCAL;
+ break;
+ case CONSUMER_DST_NET:
+ current_tracing_path_reply =
+ info_return->location.relay.relative_path;
+ current_tracing_path_reply_len =
+ sizeof(info_return->location.relay.relative_path);
+ /* Currently the only supported relay protocol. */
+ info_return->location.relay.protocol =
+ (int8_t) LTTNG_TRACE_ARCHIVE_LOCATION_RELAY_PROTOCOL_TYPE_TCP;
+
+ ret = lttng_strncpy(info_return->location.relay.host,
+ session_get_net_consumer_hostname(session),
+ sizeof(info_return->location.relay.host));
+ if (ret) {
+ ERR("Failed to host name to rotate_get_info reply");
+ info_return->status = LTTNG_ROTATION_STATUS_ERROR;
+ ret = -LTTNG_ERR_UNK;
+ goto end;
+ }
+
+ session_get_net_consumer_ports(session,
+ &info_return->location.relay.ports.control,
+ &info_return->location.relay.ports.data);
+ info_return->location_type =
+ (int8_t) LTTNG_TRACE_ARCHIVE_LOCATION_TYPE_RELAY;
+ break;
+ default:
+ abort();
+ }
+ ret = lttng_strncpy(current_tracing_path_reply,
session->rotation_chunk.current_rotate_path,
- sizeof(info_return->path));
+ current_tracing_path_reply_len);
if (ret) {
- ERR("Failed to copy active tracing path to rotate_get_info reply");
+ ERR("Failed to copy current tracing path to rotate_get_info reply");
info_return->status = LTTNG_ROTATION_STATUS_ERROR;
ret = -LTTNG_ERR_UNK;
goto end;
}
+
break;
+ }
case LTTNG_ROTATION_STATE_ERROR:
DBG("Reporting that an error occurred during rotation %" PRIu64 " of session %s",
rotation_id, session->name);
* Return 0 on success or else an LTTNG_ERR code.
*/
int cmd_rotation_set_schedule(struct ltt_session *session,
- uint64_t timer_us, uint64_t size)
+ uint64_t timer_us, uint64_t size,
+ struct notification_thread_handle *notification_thread_handle)
{
int ret;
goto end;
}
+ if (size && size != -1ULL && session->rotate_size) {
+ ret = LTTNG_ERR_ROTATION_SIZE_SET;
+ goto end;
+ } else if (size == -1ULL && !session->rotate_size) {
+ ret = LTTNG_ERR_ROTATION_NO_SIZE_SET;
+ goto end;
+ }
+
if (timer_us && !session->rotate_timer_period) {
if (timer_us > UINT_MAX) {
ret = LTTNG_ERR_INVALID;
session->rotate_timer_period = 0;
}
+ if (size > 0) {
+ if (size == -1ULL) {
+ ret = unsubscribe_session_consumed_size_rotation(session,
+ notification_thread_handle);
+ if (ret) {
+ ret = LTTNG_ERR_UNK;
+ goto end;
+ }
+ session->rotate_size = 0;
+ } else {
+ ret = subscribe_session_consumed_size_rotation(session,
+ size, notification_thread_handle);
+ if (ret) {
+ PERROR("Subscribe to session usage");
+ ret = LTTNG_ERR_UNK;
+ goto end;
+ }
+ session->rotate_size = size;
+ }
+ }
+
ret = LTTNG_OK;
goto end;
const char *path;
if (!session->snapshot_mode) {
- if (session->rotate_count == 0) {
+ if (session->current_archive_id == 0) {
if (session->kernel_session) {
path = session_get_base_path(session);
} else if (session->ust_session) {
return ret;
}
+/* Wait for a given path to be removed before continuing. */
+static enum lttng_error_code wait_on_path(void *path_data)
+{
+ const char *shm_path = path_data;
+
+ DBG("Waiting for the shm path at %s to be removed before completing session destruction",
+ shm_path);
+ while (true) {
+ int ret;
+ struct stat st;
+
+ ret = stat(shm_path, &st);
+ if (ret) {
+ if (errno != ENOENT) {
+ PERROR("stat() returned an error while checking for the existence of the shm path");
+ } else {
+ DBG("shm path no longer exists, completing the destruction of session");
+ }
+ break;
+ } else {
+ if (!S_ISDIR(st.st_mode)) {
+ ERR("The type of shm path %s returned by stat() is not a directory; aborting the wait for shm path removal",
+ shm_path);
+ break;
+ }
+ }
+ usleep(SESSION_DESTROY_SHM_PATH_CHECK_DELAY_US);
+ }
+ return LTTNG_OK;
+}
+
+/*
+ * Returns a pointer to a handler to run on completion of a command.
+ * Returns NULL if no handler has to be run for the last command executed.
+ */
+const struct cmd_completion_handler *cmd_pop_completion_handler(void)
+{
+ struct cmd_completion_handler *handler = current_completion_handler;
+
+ current_completion_handler = NULL;
+ return handler;
+}
+
/*
* Init command subsystem.
*/