X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-relayd%2Flive.c;h=1a691a849c11e972fee17059df4d5c7068846f01;hp=480c459ce6a49724f54d1b6ef71c0b5a6d458a1b;hb=4a9daf1745ccbd2aab029206a664f39fcbd640ce;hpb=cef0f7d51b8025d3ba04e6496242c1cca1641aa6 diff --git a/src/bin/lttng-relayd/live.c b/src/bin/lttng-relayd/live.c index 480c459ce..1a691a849 100644 --- a/src/bin/lttng-relayd/live.c +++ b/src/bin/lttng-relayd/live.c @@ -57,15 +57,10 @@ #include "lttng-viewer.h" #include "utils.h" #include "health-relayd.h" +#include "testpoint.h" static struct lttng_uri *live_uri; -/* - * Quit pipe for all threads. This permits a single cancellation point - * for all threads when receiving an event on the pipe. - */ -static int live_thread_quit_pipe[2] = { -1, -1 }; - /* * This pipe is used to inform the worker thread that a command is queued and * ready to be processed. @@ -126,7 +121,7 @@ void stop_threads(void) /* Stopping all threads */ DBG("Terminating all live threads"); - ret = notify_thread_pipe(live_thread_quit_pipe[1]); + ret = notify_thread_pipe(thread_quit_pipe[1]); if (ret < 0) { ERR("write error on thread quit pipe"); } @@ -155,7 +150,7 @@ int create_thread_poll_set(struct lttng_poll_event *events, int size) } /* Add quit pipe */ - ret = lttng_poll_add(events, live_thread_quit_pipe[0], LPOLLIN); + ret = lttng_poll_add(events, thread_quit_pipe[0], LPOLLIN | LPOLLERR); if (ret < 0) { goto error; } @@ -174,7 +169,7 @@ error: static int check_thread_quit_pipe(int fd, uint32_t events) { - if (fd == live_thread_quit_pipe[0] && (events & LPOLLIN)) { + if (fd == thread_quit_pipe[0] && (events & LPOLLIN)) { return 1; } @@ -245,9 +240,7 @@ void *thread_listener(void *data) goto error_sock_control; } - /* - * Pass 3 as size here for the thread quit pipe, control and data socket. - */ + /* Pass 2 as size here for the thread quit pipe and control sockets. */ ret = create_thread_poll_set(&events, 2); if (ret < 0) { goto error_create_poll; @@ -259,6 +252,12 @@ void *thread_listener(void *data) goto error_poll_add; } + lttng_relay_notify_ready(); + + if (testpoint(relayd_thread_live_listener)) { + goto error_testpoint; + } + while (1) { health_code_update(); @@ -346,6 +345,7 @@ restart: exit: error: error_poll_add: +error_testpoint: lttng_poll_clean(&events); error_create_poll: if (live_control_sock->fd >= 0) { @@ -381,6 +381,10 @@ void *thread_dispatcher(void *data) health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_DISPATCHER); + if (testpoint(relayd_thread_live_dispatcher)) { + goto error_testpoint; + } + health_code_update(); while (!CMM_LOAD_SHARED(live_dispatch_thread_exit)) { @@ -429,6 +433,7 @@ void *thread_dispatcher(void *data) err = 0; error: +error_testpoint: if (err) { health_error(); ERR("Health error occurred in %s", __func__); @@ -478,7 +483,7 @@ int viewer_connect(struct relay_command *cmd) if (reply.major != be32toh(msg.major)) { DBG("Incompatible major versions (%u vs %u)", reply.major, be32toh(msg.major)); - ret = 0; + ret = -1; goto end; } @@ -614,7 +619,7 @@ static int open_index(struct relay_viewer_stream *stream) { int ret; char fullpath[PATH_MAX]; - struct lttng_packet_index_file_hdr hdr; + struct ctf_packet_index_file_hdr hdr; if (stream->tracefile_count > 0) { ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s_%" @@ -649,13 +654,13 @@ static int open_index(struct relay_viewer_stream *stream) PERROR("Reading index header"); goto error; } - if (strncmp(hdr.magic, INDEX_MAGIC, sizeof(hdr.magic)) != 0) { + if (be32toh(hdr.magic) != CTF_INDEX_MAGIC) { ERR("Invalid header magic"); ret = -1; goto error; } - if (be32toh(hdr.index_major) != INDEX_MAJOR || - be32toh(hdr.index_minor) != INDEX_MINOR) { + if (be32toh(hdr.index_major) != CTF_INDEX_MAJOR || + be32toh(hdr.index_minor) != CTF_INDEX_MINOR) { ERR("Invalid header version"); ret = -1; goto error; @@ -698,6 +703,7 @@ int init_viewer_stream(struct relay_stream *stream, int seek_last) LTTNG_VIEWER_NAME_MAX); viewer_stream->tracefile_count = stream->tracefile_count; viewer_stream->metadata_flag = stream->metadata_flag; + viewer_stream->tracefile_count_last = -1ULL; if (seek_last) { viewer_stream->tracefile_count_current = stream->tracefile_count_current; @@ -706,11 +712,11 @@ int init_viewer_stream(struct relay_stream *stream, int seek_last) stream->oldest_tracefile_id; } - /* - * The deletion of this ctf_trace object is only done in a call RCU of the - * relay stream making it valid as long as we have the read side lock. - */ viewer_stream->ctf_trace = stream->ctf_trace; + if (viewer_stream->metadata_flag) { + viewer_stream->ctf_trace->viewer_metadata_stream = + viewer_stream; + } uatomic_inc(&viewer_stream->ctf_trace->refcount); lttng_ht_node_init_u64(&viewer_stream->stream_n, stream->stream_handle); @@ -746,7 +752,7 @@ int init_viewer_stream(struct relay_stream *stream, int seek_last) if (seek_last && viewer_stream->index_read_fd > 0) { ret = lseek(viewer_stream->index_read_fd, viewer_stream->total_index_received * - sizeof(struct lttng_packet_index), + sizeof(struct ctf_packet_index), SEEK_CUR); if (ret < 0) { goto error; @@ -764,7 +770,7 @@ error: /* * Rotate a stream to the next tracefile. * - * Returns 0 on success, a negative value on error. + * Returns 0 on success, 1 on EOF, a negative value on error. */ static int rotate_viewer_stream(struct relay_viewer_stream *viewer_stream, @@ -777,6 +783,15 @@ int rotate_viewer_stream(struct relay_viewer_stream *viewer_stream, tracefile_id = (viewer_stream->tracefile_count_current + 1) % viewer_stream->tracefile_count; + /* + * Detect the last tracefile to open. + */ + if (viewer_stream->tracefile_count_last != -1ULL && + viewer_stream->tracefile_count_last == + viewer_stream->tracefile_count_current) { + ret = 1; + goto end; + } if (stream) { pthread_mutex_lock(&stream->viewer_stream_rotation_lock); @@ -802,26 +817,22 @@ int rotate_viewer_stream(struct relay_viewer_stream *viewer_stream, } viewer_stream->tracefile_count_current = tracefile_id; - if (viewer_stream->abort_flag == 0) { - if (viewer_stream->index_read_fd > 0) { - ret = close(viewer_stream->index_read_fd); - if (ret < 0) { - PERROR("close index file %d", - viewer_stream->index_read_fd); - } - viewer_stream->index_read_fd = -1; - } - if (viewer_stream->read_fd > 0) { - ret = close(viewer_stream->read_fd); - if (ret < 0) { - PERROR("close tracefile %d", - viewer_stream->read_fd); - } - viewer_stream->read_fd = -1; - } - } else { - viewer_stream->abort_flag = 0; + ret = close(viewer_stream->index_read_fd); + if (ret < 0) { + PERROR("close index file %d", + viewer_stream->index_read_fd); } + viewer_stream->index_read_fd = -1; + ret = close(viewer_stream->read_fd); + if (ret < 0) { + PERROR("close tracefile %d", + viewer_stream->read_fd); + } + viewer_stream->read_fd = -1; + + pthread_mutex_lock(&viewer_stream->overwrite_lock); + viewer_stream->abort_flag = 0; + pthread_mutex_unlock(&viewer_stream->overwrite_lock); viewer_stream->index_read_fd = -1; viewer_stream->read_fd = -1; @@ -836,6 +847,7 @@ int rotate_viewer_stream(struct relay_viewer_stream *viewer_stream, ret = 0; +end: error: return ret; } @@ -847,7 +859,8 @@ static int viewer_attach_session(struct relay_command *cmd, struct lttng_ht *sessions_ht) { - int ret, send_streams = 0, nb_streams = 0; + int ret, send_streams = 0; + uint32_t nb_streams = 0, nb_streams_ready = 0; struct lttng_viewer_attach_session_request request; struct lttng_viewer_attach_session_response response; struct lttng_viewer_stream send_stream; @@ -954,14 +967,16 @@ int viewer_attach_session(struct relay_command *cmd, if (stream->session != cmd->session) { continue; } + nb_streams++; /* - * Don't send streams with no ctf_trace, they are not ready to be - * read. + * Don't send streams with no ctf_trace, they are not + * ready to be read. */ - if (!stream->ctf_trace) { + if (!stream->ctf_trace || !stream->viewer_ready) { continue; } + nb_streams_ready++; vstream = live_find_viewer_stream_by_id(stream->stream_handle); if (!vstream) { @@ -970,7 +985,11 @@ int viewer_attach_session(struct relay_command *cmd, goto end_unlock; } } - nb_streams++; + } + + /* We must have the same amount of existing stream and ready stream. */ + if (nb_streams != nb_streams_ready) { + nb_streams = 0; } response.streams_count = htobe32(nb_streams); } @@ -985,10 +1004,10 @@ send_reply: health_code_update(); /* - * Unknown or busy session, just return gracefully, the viewer knows what + * Unknown or empty session, just return gracefully, the viewer knows what * is happening. */ - if (!send_streams) { + if (!send_streams || !nb_streams) { ret = 0; goto end_unlock; } @@ -1056,6 +1075,107 @@ end: return stream; } +static +void deferred_free_viewer_stream(struct rcu_head *head) +{ + struct relay_viewer_stream *stream = + caa_container_of(head, struct relay_viewer_stream, rcu_node); + + free(stream->path_name); + free(stream->channel_name); + free(stream); +} + +static +void delete_viewer_stream(struct relay_viewer_stream *vstream) +{ + int delret; + struct lttng_ht_iter iter; + + iter.iter.node = &vstream->stream_n.node; + delret = lttng_ht_del(viewer_streams_ht, &iter); + assert(!delret); +} + +static +void destroy_viewer_stream(struct relay_viewer_stream *vstream) +{ + unsigned long ret_ref; + int ret; + + assert(vstream); + ret_ref = uatomic_add_return(&vstream->ctf_trace->refcount, -1); + assert(ret_ref >= 0); + + if (vstream->read_fd >= 0) { + ret = close(vstream->read_fd); + if (ret < 0) { + PERROR("close read_fd"); + } + } + if (vstream->index_read_fd >= 0) { + ret = close(vstream->index_read_fd); + if (ret < 0) { + PERROR("close index_read_fd"); + } + } + + /* + * If the only stream left in the HT is the metadata stream, + * we need to remove it because we won't detect a EOF for this + * stream. + */ + if (ret_ref == 1 && vstream->ctf_trace->viewer_metadata_stream) { + delete_viewer_stream(vstream->ctf_trace->viewer_metadata_stream); + destroy_viewer_stream(vstream->ctf_trace->viewer_metadata_stream); + vstream->ctf_trace->metadata_stream = NULL; + DBG("Freeing ctf_trace %" PRIu64, vstream->ctf_trace->id); + /* + * The streaming-side is already closed and we can't receive a new + * stream concurrently at this point (since the session is being + * destroyed), so when we detect the refcount equals 0, we are the + * only owners of the ctf_trace and we can free it ourself. + */ + free(vstream->ctf_trace); + } + + call_rcu(&vstream->rcu_node, deferred_free_viewer_stream); +} + +/* + * Atomically check if new streams got added in the session since the last + * check and reset the flag to 0. + * + * Returns 1 if new streams got added, 0 if nothing changed, a negative value + * on error. + */ +static +int check_new_streams(uint64_t session_id, struct lttng_ht *sessions_ht) +{ + struct lttng_ht_node_ulong *node; + struct lttng_ht_iter iter; + struct relay_session *session; + unsigned long current_val; + int ret; + + lttng_ht_lookup(sessions_ht, + (void *)((unsigned long) session_id), &iter); + node = lttng_ht_iter_get_node_ulong(&iter); + if (node == NULL) { + DBG("Relay session %" PRIu64 " not found", session_id); + ret = -1; + goto error; + } + + session = caa_container_of(node, struct relay_session, session_n); + + current_val = uatomic_cmpxchg(&session->new_streams, 1, 0); + ret = current_val; + +error: + return ret; +} + /* * Send the next index for a stream. * @@ -1068,7 +1188,7 @@ int viewer_get_next_index(struct relay_command *cmd, int ret; struct lttng_viewer_get_next_index request_index; struct lttng_viewer_index viewer_index; - struct lttng_packet_index packet_index; + struct ctf_packet_index packet_index; struct relay_viewer_stream *vstream; struct relay_stream *rstream; @@ -1134,6 +1254,11 @@ int viewer_get_next_index(struct relay_command *cmd, ret = rotate_viewer_stream(vstream, rstream); if (ret < 0) { goto end_unlock; + } else if (ret == 1) { + viewer_index.status = htobe32(VIEWER_INDEX_HUP); + delete_viewer_stream(vstream); + destroy_viewer_stream(vstream); + goto send_reply; } } pthread_mutex_lock(&rstream->viewer_stream_rotation_lock); @@ -1162,6 +1287,8 @@ int viewer_get_next_index(struct relay_command *cmd, vstream->total_index_received == vstream->last_sent_index) { /* Last index sent and current tracefile closed in write */ viewer_index.status = htobe32(VIEWER_INDEX_HUP); + delete_viewer_stream(vstream); + destroy_viewer_stream(vstream); goto send_reply; } else { vstream->close_write_flag = 1; @@ -1173,6 +1300,13 @@ int viewer_get_next_index(struct relay_command *cmd, viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA; } + ret = check_new_streams(vstream->session_id, sessions_ht); + if (ret < 0) { + goto end_unlock; + } else if (ret == 1) { + viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_STREAM; + } + pthread_mutex_lock(&vstream->overwrite_lock); if (vstream->abort_flag) { /* @@ -1184,6 +1318,11 @@ int viewer_get_next_index(struct relay_command *cmd, ret = rotate_viewer_stream(vstream, rstream); if (ret < 0) { goto end_unlock; + } else if (ret == 1) { + viewer_index.status = htobe32(VIEWER_INDEX_HUP); + delete_viewer_stream(vstream); + destroy_viewer_stream(vstream); + goto send_reply; } goto send_reply; } @@ -1200,6 +1339,11 @@ int viewer_get_next_index(struct relay_command *cmd, ret = rotate_viewer_stream(vstream, rstream); if (ret < 0) { goto end_unlock; + } else if (ret == 1) { + viewer_index.status = htobe32(VIEWER_INDEX_HUP); + delete_viewer_stream(vstream); + destroy_viewer_stream(vstream); + goto send_reply; } } else { PERROR("Relay reading index file %d", @@ -1251,7 +1395,8 @@ end: * Return 0 on success or else a negative value. */ static -int viewer_get_packet(struct relay_command *cmd) +int viewer_get_packet(struct relay_command *cmd, + struct lttng_ht *sessions_ht) { int ret, send_data = 0; char *data = NULL; @@ -1326,6 +1471,15 @@ int viewer_get_packet(struct relay_command *cmd) goto send_reply; } + ret = check_new_streams(stream->session_id, sessions_ht); + if (ret < 0) { + goto end_unlock; + } else if (ret == 1) { + reply.status = htobe32(VIEWER_GET_PACKET_ERR); + reply.flags |= LTTNG_VIEWER_FLAG_NEW_STREAM; + goto send_reply; + } + len = be32toh(get_packet_info.len); data = zmalloc(len); if (!data) { @@ -1560,7 +1714,7 @@ int process_control(struct lttng_viewer_cmd *recv_hdr, ret = viewer_get_next_index(cmd, sessions_ht); break; case VIEWER_GET_PACKET: - ret = viewer_get_packet(cmd); + ret = viewer_get_packet(cmd, sessions_ht); break; case VIEWER_GET_METADATA: ret = viewer_get_metadata(cmd); @@ -1648,66 +1802,38 @@ void deferred_free_connection(struct rcu_head *head) free(relay_connection); } -static -void deferred_free_viewer_stream(struct rcu_head *head) -{ - struct relay_viewer_stream *stream = - caa_container_of(head, struct relay_viewer_stream, rcu_node); - - if (stream->ctf_trace) { - uatomic_dec(&stream->ctf_trace->refcount); - assert(uatomic_read(&stream->ctf_trace->refcount) >= 0); - if (uatomic_read(&stream->ctf_trace->refcount) == 0) { - DBG("Freeing ctf_trace %" PRIu64, stream->ctf_trace->id); - free(stream->ctf_trace); - } - } - - free(stream->path_name); - free(stream->channel_name); - free(stream); -} - +/* + * Delete all streams for a specific session ID. + */ static void viewer_del_streams(uint64_t session_id) { - int ret; struct relay_viewer_stream *stream; - struct lttng_ht_node_u64 *node; struct lttng_ht_iter iter; rcu_read_lock(); - cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, node, node) { + cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, stream, + stream_n.node) { health_code_update(); - node = lttng_ht_iter_get_node_u64(&iter); - if (!node) { - continue; - } - - stream = caa_container_of(node, struct relay_viewer_stream, stream_n); if (stream->session_id != session_id) { continue; } - if (stream->read_fd >= 0) { - ret = close(stream->read_fd); - if (ret < 0) { - PERROR("close read_fd"); - } - } - if (stream->index_read_fd >= 0) { - ret = close(stream->index_read_fd); - if (ret < 0) { - PERROR("close index_read_fd"); - } - } - if (stream->metadata_flag && stream->ctf_trace) { + delete_viewer_stream(stream); + assert(stream->ctf_trace); + + if (stream->metadata_flag) { + /* + * The metadata viewer stream is destroyed once the refcount on the + * ctf trace goes to 0 in the destroy stream function thus there is + * no explicit call to that function here. + */ stream->ctf_trace->metadata_sent = 0; + stream->ctf_trace->viewer_metadata_stream = NULL; + } else { + destroy_viewer_stream(stream); } - ret = lttng_ht_del(viewer_streams_ht, &iter); - assert(!ret); - call_rcu(&stream->rcu_node, deferred_free_viewer_stream); } rcu_read_unlock(); } @@ -1727,6 +1853,9 @@ void del_connection(struct lttng_ht *relay_connections_ht, assert(iter); assert(relay_connection); + DBG("Cleaning connection of session ID %" PRIu64, + relay_connection->session_id); + ret = lttng_ht_del(relay_connections_ht, iter); assert(!ret); @@ -1758,6 +1887,10 @@ void *thread_worker(void *data) health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_WORKER); + if (testpoint(relayd_thread_live_worker)) { + goto error_testpoint; + } + /* table of connections indexed on socket */ relay_connections_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); if (!relay_connections_ht) { @@ -1913,6 +2046,7 @@ relay_connections_ht_error: DBG("Viewer worker thread exited with error"); } DBG("Viewer worker thread cleanup complete"); +error_testpoint: if (err) { health_error(); ERR("Health error occurred in %s", __func__); @@ -1971,7 +2105,7 @@ error: * main */ int live_start_threads(struct lttng_uri *uri, - struct relay_local_data *relay_ctx, int quit_pipe[2]) + struct relay_local_data *relay_ctx) { int ret = 0; void *status; @@ -1980,9 +2114,6 @@ int live_start_threads(struct lttng_uri *uri, assert(uri); live_uri = uri; - live_thread_quit_pipe[0] = quit_pipe[0]; - live_thread_quit_pipe[1] = quit_pipe[1]; - /* Check if daemon is UID = 0 */ is_root = !getuid();