(void) relayd_close(&relayd->control_sock);
(void) relayd_close(&relayd->data_sock);
+ pthread_mutex_destroy(&relayd->ctrl_sock_mutex);
free(relayd);
}
* If a local data context is available, notify the threads that the streams'
* state have changed.
*/
-static void cleanup_relayd(struct consumer_relayd_sock_pair *relayd,
- struct lttng_consumer_local_data *ctx)
+void lttng_consumer_cleanup_relayd(struct consumer_relayd_sock_pair *relayd)
{
uint64_t netidx;
assert(relayd);
- DBG("Cleaning up relayd sockets");
+ DBG("Cleaning up relayd object ID %"PRIu64, relayd->net_seq_idx);
/* Save the net sequence index before destroying the object */
netidx = relayd->net_seq_idx;
* memory barrier ordering the updates of the end point status from the
* read of this status which happens AFTER receiving this notify.
*/
- if (ctx) {
- notify_thread_lttng_pipe(ctx->consumer_data_pipe);
- notify_thread_lttng_pipe(ctx->consumer_metadata_pipe);
- }
+ notify_thread_lttng_pipe(relayd->ctx->consumer_data_pipe);
+ notify_thread_lttng_pipe(relayd->ctx->consumer_metadata_pipe);
}
/*
int cpu,
int *alloc_ret,
enum consumer_channel_type type,
- unsigned int monitor)
+ unsigned int monitor,
+ uint64_t trace_archive_id)
{
int ret;
struct lttng_consumer_stream *stream;
stream->endpoint_status = CONSUMER_ENDPOINT_ACTIVE;
stream->index_file = NULL;
stream->last_sequence_number = -1ULL;
+ stream->trace_archive_id = trace_archive_id;
pthread_mutex_init(&stream->lock, NULL);
pthread_mutex_init(&stream->metadata_timer_lock, NULL);
pthread_mutex_lock(&relayd->ctrl_sock_mutex);
ret = relayd_add_stream(&relayd->control_sock, stream->name,
path, &stream->relayd_stream_id,
- stream->chan->tracefile_size, stream->chan->tracefile_count);
+ stream->chan->tracefile_size, stream->chan->tracefile_count,
+ stream->trace_archive_id);
pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
if (ret < 0) {
+ ERR("Relayd add stream failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
+ lttng_consumer_cleanup_relayd(relayd);
goto end;
}
ret = relayd_streams_sent(&relayd->control_sock);
pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
if (ret < 0) {
+ ERR("Relayd streams sent failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
+ lttng_consumer_cleanup_relayd(relayd);
goto end;
}
} else {
*/
static int write_relayd_metadata_id(int fd,
struct lttng_consumer_stream *stream,
- struct consumer_relayd_sock_pair *relayd, unsigned long padding)
+ unsigned long padding)
{
ssize_t ret;
struct lttcomm_relayd_metadata_payload hdr;
/* Write metadata stream id before payload */
if (stream->metadata_flag) {
- ret = write_relayd_metadata_id(outfd, stream, relayd, padding);
+ ret = write_relayd_metadata_id(outfd, stream, padding);
if (ret < 0) {
relayd_hang_up = 1;
goto write_error;
* cleanup the relayd object and all associated streams.
*/
if (relayd && relayd_hang_up) {
- cleanup_relayd(relayd, ctx);
+ ERR("Relayd hangup. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
+ lttng_consumer_cleanup_relayd(relayd);
}
end:
}
stream->reset_metadata_flag = 0;
}
- ret = write_relayd_metadata_id(splice_pipe[1], stream, relayd,
+ ret = write_relayd_metadata_id(splice_pipe[1], stream,
padding);
if (ret < 0) {
written = ret;
* cleanup the relayd object and all associated streams.
*/
if (relayd && relayd_hang_up) {
- cleanup_relayd(relayd, ctx);
+ ERR("Relayd hangup. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
+ lttng_consumer_cleanup_relayd(relayd);
/* Skip splice error so the consumer does not fail */
goto end;
}
rcu_read_unlock();
}
-static
-int rotate_notify_sessiond(struct lttng_consumer_local_data *ctx,
- uint64_t key)
-{
- ssize_t ret;
-
- do {
- ret = write(ctx->channel_rotate_pipe, &key, sizeof(key));
- } while (ret == -1 && errno == EINTR);
- if (ret == -1) {
- PERROR("Failed to write to the channel rotation pipe");
- } else {
- DBG("Sent channel rotation notification for channel key %"
- PRIu64, key);
- ret = 0;
- }
-
- return (int) ret;
-}
-
/*
* Perform operations that need to be done after a stream has
* rotated and released the stream lock.
abort();
}
- if (--stream->chan->nr_stream_rotate_pending == 0) {
- DBG("Rotation of channel \"%s\" completed, notifying the session daemon",
- stream->chan->name);
- ret = rotate_notify_sessiond(ctx, stream->chan->key);
- }
- assert(stream->chan->nr_stream_rotate_pending >= 0);
pthread_mutex_unlock(&stream->chan->lock);
-
return ret;
}
/* local view of the streams */
struct lttng_consumer_stream **local_stream = NULL, *new_stream = NULL;
/* local view of consumer_data.fds_count */
- int nb_fd = 0, nb_pipes_fd;
+ int nb_fd = 0;
+ /* 2 for the consumer_data_pipe and wake up pipe */
+ const int nb_pipes_fd = 2;
/* Number of FDs with CONSUMER_ENDPOINT_INACTIVE but still open. */
int nb_inactive_fd = 0;
struct lttng_consumer_local_data *ctx = data;
free(local_stream);
local_stream = NULL;
- /*
- * Allocate for all fds + 2:
- * +1 for the consumer_data_pipe
- * +1 for wake up pipe
- */
- nb_pipes_fd = 2;
+ /* Allocate for all fds */
pollfd = zmalloc((consumer_data.stream_count + nb_pipes_fd) * sizeof(struct pollfd));
if (pollfd == NULL) {
PERROR("pollfd malloc");
* Add relayd socket pair to consumer data hashtable. If object already
* exists or on error, the function gracefully returns.
*/
+ relayd->ctx = ctx;
add_relayd(relayd);
/* All good! */
}
}
-/*
- * Try to lock the stream mutex.
- *
- * On success, 1 is returned else 0 indicating that the mutex is NOT lock.
- */
-static int stream_try_lock(struct lttng_consumer_stream *stream)
-{
- int ret;
-
- assert(stream);
-
- /*
- * Try to lock the stream mutex. On failure, we know that the stream is
- * being used else where hence there is data still being extracted.
- */
- ret = pthread_mutex_trylock(&stream->lock);
- if (ret) {
- /* For both EBUSY and EINVAL error, the mutex is NOT locked. */
- ret = 0;
- goto end;
- }
-
- ret = 1;
-
-end:
- return ret;
-}
-
/*
* Search for a relayd associated to the session id and return the reference.
*
/* Ease our life a bit */
ht = consumer_data.stream_list_ht;
- relayd = find_relayd_by_session_id(id);
- if (relayd) {
- /* Send init command for data pending. */
- pthread_mutex_lock(&relayd->ctrl_sock_mutex);
- ret = relayd_begin_data_pending(&relayd->control_sock,
- relayd->relayd_session_id);
- pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
- if (ret < 0) {
- /* Communication error thus the relayd so no data pending. */
- goto data_not_pending;
- }
- }
-
cds_lfht_for_each_entry_duplicate(ht->ht,
ht->hash_fct(&id, lttng_ht_seed),
ht->match_fct, &id,
&iter.iter, stream, node_session_id.node) {
- /* If this call fails, the stream is being used hence data pending. */
- ret = stream_try_lock(stream);
- if (!ret) {
- goto data_pending;
- }
+ pthread_mutex_lock(&stream->lock);
/*
* A removed node from the hash table indicates that the stream has
}
}
- /* Relayd check */
- if (relayd) {
- pthread_mutex_lock(&relayd->ctrl_sock_mutex);
+ pthread_mutex_unlock(&stream->lock);
+ }
+
+ relayd = find_relayd_by_session_id(id);
+ if (relayd) {
+ unsigned int is_data_inflight = 0;
+
+ /* Send init command for data pending. */
+ pthread_mutex_lock(&relayd->ctrl_sock_mutex);
+ ret = relayd_begin_data_pending(&relayd->control_sock,
+ relayd->relayd_session_id);
+ if (ret < 0) {
+ pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
+ /* Communication error thus the relayd so no data pending. */
+ goto data_not_pending;
+ }
+
+ cds_lfht_for_each_entry_duplicate(ht->ht,
+ ht->hash_fct(&id, lttng_ht_seed),
+ ht->match_fct, &id,
+ &iter.iter, stream, node_session_id.node) {
if (stream->metadata_flag) {
ret = relayd_quiescent_control(&relayd->control_sock,
stream->relayd_stream_id);
stream->relayd_stream_id,
stream->next_net_seq_num - 1);
}
- pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
+
if (ret == 1) {
- pthread_mutex_unlock(&stream->lock);
+ pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
goto data_pending;
+ } else if (ret < 0) {
+ ERR("Relayd data pending failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
+ lttng_consumer_cleanup_relayd(relayd);
+ pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
+ goto data_not_pending;
}
}
- pthread_mutex_unlock(&stream->lock);
- }
- if (relayd) {
- unsigned int is_data_inflight = 0;
-
- /* Send init command for data pending. */
- pthread_mutex_lock(&relayd->ctrl_sock_mutex);
+ /* Send end command for data pending. */
ret = relayd_end_data_pending(&relayd->control_sock,
relayd->relayd_session_id, &is_data_inflight);
pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
if (ret < 0) {
+ ERR("Relayd end data pending failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
+ lttng_consumer_cleanup_relayd(relayd);
goto data_not_pending;
}
if (is_data_inflight) {
* is already at the rotate position (produced == consumed), we flag it as
* ready for rotation. The rotation of ready streams occurs after we have
* replied to the session daemon that we have finished sampling the positions.
+ * Must be called with RCU read-side lock held to ensure existence of channel.
*
* Returns 0 on success, < 0 on error
*/
-int lttng_consumer_rotate_channel(uint64_t key, const char *path,
- uint64_t relayd_id, uint32_t metadata, uint64_t new_chunk_id,
+int lttng_consumer_rotate_channel(struct lttng_consumer_channel *channel,
+ uint64_t key, const char *path, uint64_t relayd_id,
+ uint32_t metadata, uint64_t new_chunk_id,
struct lttng_consumer_local_data *ctx)
{
int ret;
- struct lttng_consumer_channel *channel;
struct lttng_consumer_stream *stream;
struct lttng_ht_iter iter;
struct lttng_ht *ht = consumer_data.stream_per_chan_id_ht;
rcu_read_lock();
- channel = consumer_find_channel(key);
- if (!channel) {
- ERR("No channel found for key %" PRIu64, key);
- ret = -1;
- goto end;
- }
-
pthread_mutex_lock(&channel->lock);
channel->current_chunk_id = new_chunk_id;
if (consumed_pos == stream->rotate_position) {
stream->rotate_ready = true;
}
- channel->nr_stream_rotate_pending++;
ret = consumer_flush_buffer(stream, 1);
if (ret < 0) {
stream->chan->current_chunk_id,
stream->last_sequence_number);
pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
+ if (ret < 0) {
+ ERR("Relayd rotate stream failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
+ lttng_consumer_cleanup_relayd(relayd);
+ }
if (ret) {
ERR("Rotate relay stream");
}
} else {
ret = rotate_local_stream(ctx, stream);
}
+ stream->trace_archive_id++;
if (ret < 0) {
- ERR("Rotate stream");
+ ERR("Failed to rotate stream, ret = %i", ret);
goto error;
}
* This is especially important for low throughput streams that have already
* been consumed, we cannot wait for their next packet to perform the
* rotation.
+ * Need to be called with RCU read-side lock held to ensure existence of
+ * channel.
*
* Returns 0 on success, < 0 on error
*/
-int lttng_consumer_rotate_ready_streams(uint64_t key,
- struct lttng_consumer_local_data *ctx)
+int lttng_consumer_rotate_ready_streams(struct lttng_consumer_channel *channel,
+ uint64_t key, struct lttng_consumer_local_data *ctx)
{
int ret;
- struct lttng_consumer_channel *channel;
struct lttng_consumer_stream *stream;
struct lttng_ht_iter iter;
struct lttng_ht *ht = consumer_data.stream_per_chan_id_ht;
DBG("Consumer rotate ready streams in channel %" PRIu64, key);
- channel = consumer_find_channel(key);
- if (!channel) {
- ERR("No channel found for key %" PRIu64, key);
- ret = -1;
- goto end;
- }
-
cds_lfht_for_each_entry_duplicate(ht->ht,
ht->hash_fct(&channel->key, lttng_ht_seed),
ht->match_fct, &channel->key, &iter.iter,
pthread_mutex_lock(&relayd->ctrl_sock_mutex);
ret = relayd_rotate_rename(&relayd->control_sock, old_path, new_path);
+ if (ret < 0) {
+ ERR("Relayd rotate rename failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
+ lttng_consumer_cleanup_relayd(relayd);
+ }
pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
end:
return ret;
}
}
-int lttng_consumer_rotate_pending_relay(uint64_t session_id,
+/* Stream lock must be acquired by the caller. */
+static
+bool check_stream_rotation_pending(const struct lttng_consumer_stream *stream,
+ uint64_t session_id, uint64_t chunk_id)
+{
+ bool pending = false;
+
+ if (stream->session_id != session_id) {
+ /* Skip. */
+ goto end;
+ }
+
+ /*
+ * If the stream's archive_id belongs to the chunk being rotated (or an
+ * even older one), it means that the consumer has not consumed all the
+ * buffers that belong to the chunk being rotated. Therefore, the
+ * rotation is considered as ongoing/pending.
+ */
+ pending = stream->trace_archive_id <= chunk_id;
+end:
+ return pending;
+}
+
+/* RCU read lock must be acquired by the caller. */
+int lttng_consumer_check_rotation_pending_local(uint64_t session_id,
+ uint64_t chunk_id)
+{
+ struct lttng_ht_iter iter;
+ struct lttng_consumer_stream *stream;
+ bool rotation_pending = false;
+
+ /* Start with the metadata streams... */
+ cds_lfht_for_each_entry(metadata_ht->ht, &iter.iter, stream, node.node) {
+ pthread_mutex_lock(&stream->lock);
+ rotation_pending = check_stream_rotation_pending(stream,
+ session_id, chunk_id);
+ pthread_mutex_unlock(&stream->lock);
+ if (rotation_pending) {
+ goto end;
+ }
+ }
+
+ /* ... followed by the data streams. */
+ cds_lfht_for_each_entry(data_ht->ht, &iter.iter, stream, node.node) {
+ pthread_mutex_lock(&stream->lock);
+ rotation_pending = check_stream_rotation_pending(stream,
+ session_id, chunk_id);
+ pthread_mutex_unlock(&stream->lock);
+ if (rotation_pending) {
+ goto end;
+ }
+ }
+
+end:
+ return !!rotation_pending;
+}
+
+int lttng_consumer_check_rotation_pending_relay(uint64_t session_id,
uint64_t relayd_id, uint64_t chunk_id)
{
int ret;
relayd = consumer_find_relayd(relayd_id);
if (!relayd) {
- ERR("Failed to find relayd");
+ ERR("Failed to find relayd id %" PRIu64, relayd_id);
ret = -1;
goto end;
}
pthread_mutex_lock(&relayd->ctrl_sock_mutex);
ret = relayd_rotate_pending(&relayd->control_sock, chunk_id);
+ if (ret < 0) {
+ ERR("Relayd rotate pending failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
+ lttng_consumer_cleanup_relayd(relayd);
+ }
pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
end:
pthread_mutex_lock(&relayd->ctrl_sock_mutex);
ret = relayd_mkdir(&relayd->control_sock, path);
+ if (ret < 0) {
+ ERR("Relayd mkdir failed. Cleaning up relayd %" PRIu64".", relayd->net_seq_idx);
+ lttng_consumer_cleanup_relayd(relayd);
+ }
pthread_mutex_unlock(&relayd->ctrl_sock_mutex);
end: