X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=src%2Fbin%2Flttng-relayd%2Flive.c;h=53cc660918d463aacae382c6a2c858281925f26d;hp=03aab2d3c42b2a9a7472d34c024b987f3ab1b47a;hb=efe12cccde8949059bf0e91cb0e63a3dd2c21949;hpb=d3e2ba59faddb31870e2ce29b6a881f7ad5ad883 diff --git a/src/bin/lttng-relayd/live.c b/src/bin/lttng-relayd/live.c index 03aab2d3c..53cc66091 100644 --- a/src/bin/lttng-relayd/live.c +++ b/src/bin/lttng-relayd/live.c @@ -56,6 +56,7 @@ #include "lttng-relayd.h" #include "lttng-viewer.h" #include "utils.h" +#include "health-relayd.h" static struct lttng_uri *live_uri; @@ -96,8 +97,6 @@ void cleanup(void) { DBG("Cleaning up"); - /* Close thread quit pipes */ - utils_close_pipe(live_thread_quit_pipe); free(live_uri); } @@ -107,16 +106,14 @@ void cleanup(void) static int notify_thread_pipe(int wpipe) { - int ret; + ssize_t ret; - do { - ret = write(wpipe, "!", 1); - } while (ret < 0 && errno == EINTR); - if (ret < 0 || ret != 1) { + ret = lttng_write(wpipe, "!", 1); + if (ret < 1) { PERROR("write poll pipe"); } - return ret; + return (int) ret; } /* @@ -139,21 +136,6 @@ void stop_threads(void) futex_nto1_wake(&viewer_cmd_queue.futex); } -/* - * Init thread quit pipe. - * - * Return -1 on error or 0 if all pipes are created. - */ -static -int init_thread_quit_pipe(void) -{ - int ret; - - ret = utils_create_pipe_cloexec(live_thread_quit_pipe); - - return ret; -} - /* * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set. */ @@ -254,6 +236,10 @@ void *thread_listener(void *data) DBG("[thread] Relay live listener started"); + health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_LISTENER); + + health_code_update(); + live_control_sock = init_socket(live_uri); if (!live_control_sock) { goto error_sock_control; @@ -274,10 +260,14 @@ void *thread_listener(void *data) } while (1) { + health_code_update(); + DBG("Listener accepting live viewers connections"); restart: + health_poll_entry(); ret = lttng_poll_wait(&events, -1); + health_poll_exit(); if (ret < 0) { /* * Restart interrupted system call. @@ -291,6 +281,8 @@ restart: DBG("Relay new viewer connection received"); for (i = 0; i < nb_fd; i++) { + health_code_update(); + /* Fetch once the poll data */ revents = LTTNG_POLL_GETEV(&events, i); pollfd = LTTNG_POLL_GETFD(&events, i); @@ -365,8 +357,10 @@ error_create_poll: lttcomm_destroy_sock(live_control_sock); error_sock_control: if (err) { + health_error(); DBG("Live viewer listener thread exited with error"); } + health_unregister(health_relayd); DBG("Live viewer listener thread cleanup complete"); stop_threads(); return NULL; @@ -378,17 +372,26 @@ error_sock_control: static void *thread_dispatcher(void *data) { - int ret; + int err = -1; + ssize_t ret; struct cds_wfq_node *node; struct relay_command *relay_cmd = NULL; DBG("[thread] Live viewer relay dispatcher started"); + health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_DISPATCHER); + + health_code_update(); + while (!CMM_LOAD_SHARED(live_dispatch_thread_exit)) { + health_code_update(); + /* Atomically prepare the queue futex */ futex_nto1_prepare(&viewer_cmd_queue.futex); do { + health_code_update(); + /* Dequeue commands */ node = cds_wfq_dequeue_blocking(&viewer_cmd_queue.queue); if (node == NULL) { @@ -407,22 +410,30 @@ void *thread_dispatcher(void *data) * so we can be assured that the data will be read at some point in * time or wait to the end of the world :) */ - do { - ret = write(live_relay_cmd_pipe[1], relay_cmd, - sizeof(*relay_cmd)); - } while (ret < 0 && errno == EINTR); + ret = lttng_write(live_relay_cmd_pipe[1], relay_cmd, + sizeof(*relay_cmd)); free(relay_cmd); - if (ret < 0 || ret != sizeof(struct relay_command)) { + if (ret < sizeof(struct relay_command)) { PERROR("write cmd pipe"); goto error; } } while (node != NULL); /* Futex wait on queue. Blocking call on futex() */ + health_poll_entry(); futex_nto1_wait(&viewer_cmd_queue.futex); + health_poll_exit(); } + /* Normal exit, no error */ + err = 0; + error: + if (err) { + health_error(); + ERR("Health error occurred in %s", __func__); + } + health_unregister(health_relayd); DBG("Live viewer dispatch thread dying"); stop_threads(); return NULL; @@ -443,6 +454,8 @@ int viewer_connect(struct relay_command *cmd) cmd->version_check_done = 1; + health_code_update(); + /* Get version from the other side. */ ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0); if (ret < 0 || ret != sizeof(msg)) { @@ -456,6 +469,8 @@ int viewer_connect(struct relay_command *cmd) goto end; } + health_code_update(); + reply.major = RELAYD_VERSION_COMM_MAJOR; reply.minor = RELAYD_VERSION_COMM_MINOR; @@ -463,7 +478,7 @@ int viewer_connect(struct relay_command *cmd) if (reply.major != be32toh(msg.major)) { DBG("Incompatible major versions (%u vs %u)", reply.major, be32toh(msg.major)); - ret = 0; + ret = -1; goto end; } @@ -490,12 +505,17 @@ int viewer_connect(struct relay_command *cmd) if (cmd->type == RELAY_VIEWER_COMMAND) { reply.viewer_session_id = htobe64(++last_relay_viewer_session_id); } + + health_code_update(); + ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(struct lttng_viewer_connect), 0); if (ret < 0) { ERR("Relay sending version"); } + health_code_update(); + DBG("Version check done using protocol %u.%u", cmd->major, cmd->minor); ret = 0; @@ -533,6 +553,8 @@ int viewer_list_sessions(struct relay_command *cmd, cds_lfht_count_nodes(sessions_ht->ht, &approx_before, &count, &approx_after); session_list.sessions_count = htobe32(count); + health_code_update(); + ret = cmd->sock->ops->sendmsg(cmd->sock, &session_list, sizeof(session_list), 0); if (ret < 0) { @@ -540,7 +562,11 @@ int viewer_list_sessions(struct relay_command *cmd, goto end_unlock; } + health_code_update(); + cds_lfht_for_each_entry(sessions_ht->ht, &iter.iter, node, node) { + health_code_update(); + node = lttng_ht_iter_get_node_ulong(&iter); if (!node) { goto end_unlock; @@ -554,6 +580,9 @@ int viewer_list_sessions(struct relay_command *cmd, send_session.id = htobe64(session->id); send_session.live_timer = htobe32(session->live_timer); send_session.clients = htobe32(session->viewer_attached); + send_session.streams = htobe32(session->stream_count); + + health_code_update(); ret = cmd->sock->ops->sendmsg(cmd->sock, &send_session, sizeof(send_session), 0); @@ -562,6 +591,8 @@ int viewer_list_sessions(struct relay_command *cmd, goto end_unlock; } } + health_code_update(); + rcu_read_unlock(); ret = 0; goto end; @@ -574,6 +605,67 @@ end_no_session: return ret; } +/* + * Open index file using a given viewer stream. + * + * Return 0 on success or else a negative value. + */ +static int open_index(struct relay_viewer_stream *stream) +{ + int ret; + char fullpath[PATH_MAX]; + struct ctf_packet_index_file_hdr hdr; + + if (stream->tracefile_count > 0) { + ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s_%" + PRIu64 DEFAULT_INDEX_FILE_SUFFIX, stream->path_name, + stream->channel_name, stream->tracefile_count_current); + } else { + ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s" + DEFAULT_INDEX_FILE_SUFFIX, stream->path_name, + stream->channel_name); + } + if (ret < 0) { + PERROR("snprintf index path"); + goto error; + } + + DBG("Opening index file %s in read only", fullpath); + ret = open(fullpath, O_RDONLY); + if (ret < 0) { + if (errno == ENOENT) { + ret = -ENOENT; + goto error; + } else { + PERROR("opening index in read-only"); + } + goto error; + } + stream->index_read_fd = ret; + DBG("Opening index file %s in read only, (fd: %d)", fullpath, ret); + + ret = lttng_read(stream->index_read_fd, &hdr, sizeof(hdr)); + if (ret < sizeof(hdr)) { + PERROR("Reading index header"); + goto error; + } + if (be32toh(hdr.magic) != CTF_INDEX_MAGIC) { + ERR("Invalid header magic"); + ret = -1; + goto error; + } + if (be32toh(hdr.index_major) != CTF_INDEX_MAJOR || + be32toh(hdr.index_minor) != CTF_INDEX_MINOR) { + ERR("Invalid header version"); + ret = -1; + goto error; + } + ret = 0; + +error: + return ret; +} + /* * Allocate and init a new viewer_stream. * @@ -585,14 +677,12 @@ end_no_session: * Returns 0 on success or a negative value on error. */ static -int init_viewer_stream(struct relay_stream *stream, - struct lttng_ht *viewer_streams_ht) +int init_viewer_stream(struct relay_stream *stream, int seek_last) { int ret; struct relay_viewer_stream *viewer_stream; assert(stream); - assert(viewer_streams_ht); viewer_stream = zmalloc(sizeof(*viewer_stream)); if (!viewer_stream) { @@ -600,40 +690,159 @@ int init_viewer_stream(struct relay_stream *stream, ret = -1; goto error; } - - viewer_stream->read_fd = -1; - viewer_stream->index_read_fd = -1; viewer_stream->session_id = stream->session->id; viewer_stream->stream_handle = stream->stream_handle; viewer_stream->path_name = strndup(stream->path_name, LTTNG_VIEWER_PATH_MAX); viewer_stream->channel_name = strndup(stream->channel_name, LTTNG_VIEWER_NAME_MAX); - viewer_stream->total_index_received = stream->total_index_received; - viewer_stream->tracefile_size = stream->tracefile_size; viewer_stream->tracefile_count = stream->tracefile_count; viewer_stream->metadata_flag = stream->metadata_flag; + viewer_stream->tracefile_count_last = -1ULL; + if (seek_last) { + viewer_stream->tracefile_count_current = + stream->tracefile_count_current; + } else { + viewer_stream->tracefile_count_current = + stream->oldest_tracefile_id; + } + + viewer_stream->ctf_trace = stream->ctf_trace; + if (viewer_stream->metadata_flag) { + viewer_stream->ctf_trace->viewer_metadata_stream = + viewer_stream; + } + uatomic_inc(&viewer_stream->ctf_trace->refcount); + + lttng_ht_node_init_u64(&viewer_stream->stream_n, stream->stream_handle); + lttng_ht_add_unique_u64(viewer_streams_ht, &viewer_stream->stream_n); + + viewer_stream->index_read_fd = -1; + viewer_stream->read_fd = -1; /* * This is to avoid a race between the initialization of this object and * the close of the given stream. If the stream is unable to find this * viewer stream when closing, this copy will at least take the latest * value. + * We also need that for the seek_last. */ viewer_stream->total_index_received = stream->total_index_received; /* - * The deletion of this ctf_trace object is only done in a call RCU of the - * relay stream making it valid as long as we have the read side lock. + * If we never received an index for the current stream, delay + * the opening of the index, otherwise open it right now. */ - viewer_stream->ctf_trace = stream->ctf_trace; - uatomic_inc(&viewer_stream->ctf_trace->refcount); + if (viewer_stream->tracefile_count_current == + stream->tracefile_count_current && + viewer_stream->total_index_received == 0) { + viewer_stream->index_read_fd = -1; + } else { + ret = open_index(viewer_stream); + if (ret < 0) { + goto error; + } + } - lttng_ht_node_init_u64(&viewer_stream->stream_n, stream->stream_handle); - lttng_ht_add_unique_u64(viewer_streams_ht, &viewer_stream->stream_n); + if (seek_last && viewer_stream->index_read_fd > 0) { + ret = lseek(viewer_stream->index_read_fd, + viewer_stream->total_index_received * + sizeof(struct ctf_packet_index), + SEEK_CUR); + if (ret < 0) { + goto error; + } + viewer_stream->last_sent_index = + viewer_stream->total_index_received; + } + + ret = 0; + +error: + return ret; +} + +/* + * Rotate a stream to the next tracefile. + * + * Returns 0 on success, 1 on EOF, a negative value on error. + */ +static +int rotate_viewer_stream(struct relay_viewer_stream *viewer_stream, + struct relay_stream *stream) +{ + int ret; + uint64_t tracefile_id; + + assert(viewer_stream); + + tracefile_id = (viewer_stream->tracefile_count_current + 1) % + viewer_stream->tracefile_count; + /* + * Detect the last tracefile to open. + */ + if (viewer_stream->tracefile_count_last != -1ULL && + viewer_stream->tracefile_count_last == + viewer_stream->tracefile_count_current) { + ret = 1; + goto end; + } + + if (stream) { + pthread_mutex_lock(&stream->viewer_stream_rotation_lock); + } + /* + * The writer and the reader are not working in the same + * tracefile, we can read up to EOF, we don't care about the + * total_index_received. + */ + if (!stream || (stream->tracefile_count_current != tracefile_id)) { + viewer_stream->close_write_flag = 1; + } else { + /* + * We are opening a file that is still open in write, make + * sure we limit our reading to the number of indexes + * received. + */ + viewer_stream->close_write_flag = 0; + if (stream) { + viewer_stream->total_index_received = + stream->total_index_received; + } + } + viewer_stream->tracefile_count_current = tracefile_id; + + ret = close(viewer_stream->index_read_fd); + if (ret < 0) { + PERROR("close index file %d", + viewer_stream->index_read_fd); + } + viewer_stream->index_read_fd = -1; + ret = close(viewer_stream->read_fd); + if (ret < 0) { + PERROR("close tracefile %d", + viewer_stream->read_fd); + } + viewer_stream->read_fd = -1; + + pthread_mutex_lock(&viewer_stream->overwrite_lock); + viewer_stream->abort_flag = 0; + pthread_mutex_unlock(&viewer_stream->overwrite_lock); + + viewer_stream->index_read_fd = -1; + viewer_stream->read_fd = -1; + + if (stream) { + pthread_mutex_unlock(&stream->viewer_stream_rotation_lock); + } + ret = open_index(viewer_stream); + if (ret < 0) { + goto error; + } ret = 0; +end: error: return ret; } @@ -643,10 +852,10 @@ error: */ static int viewer_attach_session(struct relay_command *cmd, - struct lttng_ht *sessions_ht, - struct lttng_ht *viewer_streams_ht) + struct lttng_ht *sessions_ht) { - int ret, send_streams = 0, nb_streams = 0; + int ret, send_streams = 0; + uint32_t nb_streams = 0, nb_streams_ready = 0; struct lttng_viewer_attach_session_request request; struct lttng_viewer_attach_session_response response; struct lttng_viewer_stream send_stream; @@ -656,10 +865,10 @@ int viewer_attach_session(struct relay_command *cmd, struct lttng_ht_node_u64 *node64; struct lttng_ht_iter iter; struct relay_session *session; + int seek_last = 0; assert(cmd); assert(sessions_ht); - assert(viewer_streams_ht); DBG("Attach session received"); @@ -669,6 +878,8 @@ int viewer_attach_session(struct relay_command *cmd, goto end_no_session; } + health_code_update(); + ret = cmd->sock->ops->recvmsg(cmd->sock, &request, sizeof(request), 0); if (ret < 0 || ret != sizeof(request)) { if (ret == 0) { @@ -681,6 +892,8 @@ int viewer_attach_session(struct relay_command *cmd, goto error; } + health_code_update(); + rcu_read_lock(); lttng_ht_lookup(sessions_ht, (void *)((unsigned long) be64toh(request.session_id)), &iter); @@ -693,7 +906,7 @@ int viewer_attach_session(struct relay_command *cmd, } session = caa_container_of(node, struct relay_session, session_n); - if (cmd->session == session) { + if (cmd->session_id == session->id) { /* Same viewer already attached, just send the stream list. */ send_streams = 1; response.status = htobe32(VIEWER_ATTACH_OK); @@ -709,6 +922,7 @@ int viewer_attach_session(struct relay_command *cmd, session->viewer_attached++; send_streams = 1; response.status = htobe32(VIEWER_ATTACH_OK); + cmd->session_id = session->id; cmd->session = session; } @@ -717,7 +931,7 @@ int viewer_attach_session(struct relay_command *cmd, /* Default behaviour. */ break; case VIEWER_SEEK_LAST: - /* TODO */ + seek_last = 1; break; default: ERR("Wrong seek parameter"); @@ -738,6 +952,8 @@ int viewer_attach_session(struct relay_command *cmd, cds_lfht_for_each_entry(relay_streams_ht->ht, &iter.iter, node, node) { struct relay_viewer_stream *vstream; + health_code_update(); + node = lttng_ht_iter_get_node_ulong(&iter); if (!node) { continue; @@ -746,40 +962,47 @@ int viewer_attach_session(struct relay_command *cmd, if (stream->session != cmd->session) { continue; } + nb_streams++; /* - * Don't send streams with no ctf_trace, they are not ready to be - * read. + * Don't send streams with no ctf_trace, they are not + * ready to be read. */ - if (!stream->ctf_trace) { + if (!stream->ctf_trace || !stream->viewer_ready) { continue; } + nb_streams_ready++; - vstream = live_find_viewer_stream_by_id(stream->stream_handle, - viewer_streams_ht); + vstream = live_find_viewer_stream_by_id(stream->stream_handle); if (!vstream) { - ret = init_viewer_stream(stream, viewer_streams_ht); + ret = init_viewer_stream(stream, seek_last); if (ret < 0) { goto end_unlock; } } - nb_streams++; + } + + /* We must have the same amount of existing stream and ready stream. */ + if (nb_streams != nb_streams_ready) { + nb_streams = 0; } response.streams_count = htobe32(nb_streams); } send_reply: + health_code_update(); ret = cmd->sock->ops->sendmsg(cmd->sock, &response, sizeof(response), 0); if (ret < 0) { ERR("Relay sending viewer attach response"); goto end_unlock; } + health_code_update(); /* - * Unknown or busy session, just return gracefully, the viewer knows what + * Unknown or empty session, just return gracefully, the viewer knows what * is happening. */ - if (!send_streams) { + if (!send_streams || !nb_streams) { ret = 0; goto end_unlock; } @@ -787,6 +1010,8 @@ send_reply: /* We should only be there if we have a session to attach to. */ assert(session); cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, node, node) { + health_code_update(); + node64 = lttng_ht_iter_get_node_u64(&iter); if (!node64) { continue; @@ -822,84 +1047,17 @@ error: return ret; } -/* - * Open index file using a given viewer stream. - * - * Return 0 on success or else a negative value. - */ -static int open_index(struct relay_viewer_stream *stream) -{ - int ret; - char fullpath[PATH_MAX]; - struct lttng_packet_index_file_hdr hdr; - - if (stream->tracefile_size > 0) { - /* For now we don't support on-disk ring buffer. */ - ret = -1; - goto end; - } else { - ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR - "/%s" DEFAULT_INDEX_FILE_SUFFIX, - stream->path_name, stream->channel_name); - if (ret < 0) { - PERROR("snprintf index path"); - goto error; - } - } - - DBG("Opening index file %s in read only", fullpath); - ret = open(fullpath, O_RDONLY); - if (ret < 0) { - if (errno == ENOENT) { - ret = ENOENT; - goto error; - } else { - PERROR("opening index in read-only"); - } - goto error; - } - stream->index_read_fd = ret; - DBG("Opening index file %s in read only, (fd: %d)", fullpath, ret); - - do { - ret = read(stream->index_read_fd, &hdr, sizeof(hdr)); - } while (ret < 0 && errno == EINTR); - if (ret < 0) { - PERROR("Reading index header"); - goto error; - } - if (strncmp(hdr.magic, INDEX_MAGIC, sizeof(hdr.magic)) != 0) { - ERR("Invalid header magic"); - ret = -1; - goto error; - } - if (be32toh(hdr.index_major) != INDEX_MAJOR || - be32toh(hdr.index_minor) != INDEX_MINOR) { - ERR("Invalid header version"); - ret = -1; - goto error; - } - ret = 0; - -error: -end: - return ret; -} - /* * Get viewer stream from stream id. * * RCU read side lock MUST be acquired. */ -struct relay_viewer_stream *live_find_viewer_stream_by_id(uint64_t stream_id, - struct lttng_ht *viewer_streams_ht) +struct relay_viewer_stream *live_find_viewer_stream_by_id(uint64_t stream_id) { struct lttng_ht_node_u64 *node; struct lttng_ht_iter iter; struct relay_viewer_stream *stream = NULL; - assert(viewer_streams_ht); - lttng_ht_lookup(viewer_streams_ht, &stream_id, &iter); node = lttng_ht_iter_get_node_u64(&iter); if (node == NULL) { @@ -912,6 +1070,72 @@ end: return stream; } +static +void deferred_free_viewer_stream(struct rcu_head *head) +{ + struct relay_viewer_stream *stream = + caa_container_of(head, struct relay_viewer_stream, rcu_node); + + free(stream->path_name); + free(stream->channel_name); + free(stream); +} + +static +void delete_viewer_stream(struct relay_viewer_stream *vstream) +{ + int delret; + struct lttng_ht_iter iter; + + iter.iter.node = &vstream->stream_n.node; + delret = lttng_ht_del(viewer_streams_ht, &iter); + assert(!delret); +} + +static +void destroy_viewer_stream(struct relay_viewer_stream *vstream) +{ + unsigned long ret_ref; + int ret; + + assert(vstream); + ret_ref = uatomic_add_return(&vstream->ctf_trace->refcount, -1); + assert(ret_ref >= 0); + + if (vstream->read_fd >= 0) { + ret = close(vstream->read_fd); + if (ret < 0) { + PERROR("close read_fd"); + } + } + if (vstream->index_read_fd >= 0) { + ret = close(vstream->index_read_fd); + if (ret < 0) { + PERROR("close index_read_fd"); + } + } + + /* + * If the only stream left in the HT is the metadata stream, + * we need to remove it because we won't detect a EOF for this + * stream. + */ + if (ret_ref == 1 && vstream->ctf_trace->viewer_metadata_stream) { + destroy_viewer_stream(vstream->ctf_trace->viewer_metadata_stream); + vstream->ctf_trace->metadata_stream = NULL; + DBG("Freeing ctf_trace %" PRIu64, vstream->ctf_trace->id); + /* + * The streaming-side is already closed and we can't receive a new + * stream concurrently at this point (since the session is being + * destroyed), so when we detect the refcount equals 0, we are the + * only owners of the ctf_trace and we can free it ourself. + */ + free(vstream->ctf_trace); + } + + call_rcu(&vstream->rcu_node, deferred_free_viewer_stream); +} + /* * Send the next index for a stream. * @@ -919,17 +1143,16 @@ end: */ static int viewer_get_next_index(struct relay_command *cmd, - struct lttng_ht *viewer_streams_ht, struct lttng_ht *sessions_ht) + struct lttng_ht *sessions_ht) { int ret; struct lttng_viewer_get_next_index request_index; struct lttng_viewer_index viewer_index; - struct lttng_packet_index packet_index; + struct ctf_packet_index packet_index; struct relay_viewer_stream *vstream; struct relay_stream *rstream; assert(cmd); - assert(viewer_streams_ht); assert(sessions_ht); DBG("Viewer get next index"); @@ -940,6 +1163,7 @@ int viewer_get_next_index(struct relay_command *cmd, goto end_no_session; } + health_code_update(); ret = cmd->sock->ops->recvmsg(cmd->sock, &request_index, sizeof(request_index), 0); if (ret < 0 || ret != sizeof(request_index)) { @@ -947,10 +1171,10 @@ int viewer_get_next_index(struct relay_command *cmd, ERR("Relay didn't receive the whole packet"); goto end; } + health_code_update(); rcu_read_lock(); - vstream = live_find_viewer_stream_by_id(be64toh(request_index.stream_id), - viewer_streams_ht); + vstream = live_find_viewer_stream_by_id(be64toh(request_index.stream_id)); if (!vstream) { ret = -1; goto end_unlock; @@ -969,7 +1193,7 @@ int viewer_get_next_index(struct relay_command *cmd, /* First time, we open the index file */ if (vstream->index_read_fd < 0) { ret = open_index(vstream); - if (ret == ENOENT) { + if (ret == -ENOENT) { /* * The index is created only when the first data packet arrives, it * might not be ready at the beginning of the session @@ -984,23 +1208,50 @@ int viewer_get_next_index(struct relay_command *cmd, rstream = relay_stream_find_by_id(vstream->stream_handle); if (rstream) { - if (rstream->beacon_ts_end != -1ULL && - vstream->last_sent_index == rstream->total_index_received) { - viewer_index.status = htobe32(VIEWER_INDEX_INACTIVE); - viewer_index.timestamp_end = htobe64(rstream->beacon_ts_end); - goto send_reply; + if (vstream->abort_flag) { + /* Rotate on abort (overwrite). */ + DBG("Viewer rotate because of overwrite"); + ret = rotate_viewer_stream(vstream, rstream); + if (ret < 0) { + goto end_unlock; + } else if (ret == 1) { + viewer_index.status = htobe32(VIEWER_INDEX_HUP); + delete_viewer_stream(vstream); + destroy_viewer_stream(vstream); + goto send_reply; + } } - - if (rstream->total_index_received <= vstream->last_sent_index) { - /* No new index to send, retry later. */ - viewer_index.status = htobe32(VIEWER_INDEX_RETRY); - goto send_reply; + pthread_mutex_lock(&rstream->viewer_stream_rotation_lock); + if (rstream->tracefile_count_current == vstream->tracefile_count_current) { + if (rstream->beacon_ts_end != -1ULL && + vstream->last_sent_index == rstream->total_index_received) { + viewer_index.status = htobe32(VIEWER_INDEX_INACTIVE); + viewer_index.timestamp_end = htobe64(rstream->beacon_ts_end); + pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock); + goto send_reply; + /* + * Reader and writer are working in the same tracefile, so we care + * about the number of index received and sent. Otherwise, we read + * up to EOF. + */ + } else if (rstream->total_index_received <= vstream->last_sent_index + && !vstream->close_write_flag) { + pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock); + /* No new index to send, retry later. */ + viewer_index.status = htobe32(VIEWER_INDEX_RETRY); + goto send_reply; + } } - } else if (!rstream && + pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock); + } else if (!rstream && vstream->close_write_flag && vstream->total_index_received == vstream->last_sent_index) { - /* Last index sent and stream closed */ + /* Last index sent and current tracefile closed in write */ viewer_index.status = htobe32(VIEWER_INDEX_HUP); + delete_viewer_stream(vstream); + destroy_viewer_stream(vstream); goto send_reply; + } else { + vstream->close_write_flag = 1; } if (!vstream->ctf_trace->metadata_received || @@ -1009,13 +1260,50 @@ int viewer_get_next_index(struct relay_command *cmd, viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA; } - do { - ret = read(vstream->index_read_fd, &packet_index, - sizeof(packet_index)); - } while (ret < 0 && errno == EINTR); + pthread_mutex_lock(&vstream->overwrite_lock); + if (vstream->abort_flag) { + /* + * The file is being overwritten by the writer, we cannot + * use it. + */ + viewer_index.status = htobe32(VIEWER_INDEX_RETRY); + pthread_mutex_unlock(&vstream->overwrite_lock); + ret = rotate_viewer_stream(vstream, rstream); + if (ret < 0) { + goto end_unlock; + } else if (ret == 1) { + viewer_index.status = htobe32(VIEWER_INDEX_HUP); + delete_viewer_stream(vstream); + destroy_viewer_stream(vstream); + goto send_reply; + } + goto send_reply; + } + ret = lttng_read(vstream->index_read_fd, &packet_index, + sizeof(packet_index)); + pthread_mutex_unlock(&vstream->overwrite_lock); if (ret < sizeof(packet_index)) { - PERROR("Relay reading index file"); - viewer_index.status = htobe32(VIEWER_INDEX_ERR); + /* + * The tracefile is closed in write, so we read up to EOF. + */ + if (vstream->close_write_flag == 1) { + viewer_index.status = htobe32(VIEWER_INDEX_RETRY); + /* Rotate on normal EOF */ + ret = rotate_viewer_stream(vstream, rstream); + if (ret < 0) { + goto end_unlock; + } else if (ret == 1) { + viewer_index.status = htobe32(VIEWER_INDEX_HUP); + delete_viewer_stream(vstream); + destroy_viewer_stream(vstream); + goto send_reply; + } + } else { + PERROR("Relay reading index file %d", + vstream->index_read_fd); + viewer_index.status = htobe32(VIEWER_INDEX_ERR); + } + goto send_reply; } else { viewer_index.status = htobe32(VIEWER_INDEX_OK); vstream->last_sent_index++; @@ -1034,12 +1322,14 @@ int viewer_get_next_index(struct relay_command *cmd, send_reply: viewer_index.flags = htobe32(viewer_index.flags); + health_code_update(); ret = cmd->sock->ops->sendmsg(cmd->sock, &viewer_index, sizeof(viewer_index), 0); if (ret < 0) { ERR("Relay index to viewer"); goto end_unlock; } + health_code_update(); DBG("Index %" PRIu64 "for stream %" PRIu64 "sent", vstream->last_sent_index, vstream->stream_handle); @@ -1058,8 +1348,7 @@ end: * Return 0 on success or else a negative value. */ static -int viewer_get_packet(struct relay_command *cmd, - struct lttng_ht *viewer_streams_ht) +int viewer_get_packet(struct relay_command *cmd) { int ret, send_data = 0; char *data = NULL; @@ -1070,7 +1359,6 @@ int viewer_get_packet(struct relay_command *cmd, struct relay_viewer_stream *stream; assert(cmd); - assert(viewer_streams_ht); DBG2("Relay get data packet"); @@ -1080,6 +1368,7 @@ int viewer_get_packet(struct relay_command *cmd, goto end; } + health_code_update(); ret = cmd->sock->ops->recvmsg(cmd->sock, &get_packet_info, sizeof(get_packet_info), 0); if (ret < 0 || ret != sizeof(get_packet_info)) { @@ -1087,10 +1376,13 @@ int viewer_get_packet(struct relay_command *cmd, ERR("Relay didn't receive the whole packet"); goto end; } + health_code_update(); + + /* From this point on, the error label can be reached. */ + memset(&reply, 0, sizeof(reply)); rcu_read_lock(); - stream = live_find_viewer_stream_by_id(be64toh(get_packet_info.stream_id), - viewer_streams_ht); + stream = live_find_viewer_stream_by_id(be64toh(get_packet_info.stream_id)); if (!stream) { goto error; } @@ -1104,8 +1396,14 @@ int viewer_get_packet(struct relay_command *cmd, if (stream->read_fd < 0) { char fullpath[PATH_MAX]; - ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name, - stream->channel_name); + if (stream->tracefile_count > 0) { + ret = snprintf(fullpath, PATH_MAX, "%s/%s_%" PRIu64, stream->path_name, + stream->channel_name, + stream->tracefile_count_current); + } else { + ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name, + stream->channel_name); + } if (ret < 0) { goto error; } @@ -1117,14 +1415,11 @@ int viewer_get_packet(struct relay_command *cmd, stream->read_fd = ret; } - memset(&reply, 0, sizeof(reply)); - if (!stream->ctf_trace->metadata_received || stream->ctf_trace->metadata_received > stream->ctf_trace->metadata_sent) { reply.status = htobe32(VIEWER_GET_PACKET_ERR); reply.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA; - goto send_reply; } @@ -1137,14 +1432,32 @@ int viewer_get_packet(struct relay_command *cmd, ret = lseek(stream->read_fd, be64toh(get_packet_info.offset), SEEK_SET); if (ret < 0) { - PERROR("lseek"); - goto error; + /* + * If the read fd was closed by the streaming side, the + * abort_flag will be set to 1, otherwise it is an error. + */ + if (stream->abort_flag == 0) { + PERROR("lseek"); + goto error; + } + reply.status = htobe32(VIEWER_GET_PACKET_EOF); + goto send_reply; } - read_len = read(stream->read_fd, data, len); - if (read_len < (ssize_t) len) { - PERROR("Relay reading trace file, fd: %d, offset: %" PRIu64, - stream->read_fd, be64toh(get_packet_info.offset)); - goto error; + read_len = lttng_read(stream->read_fd, data, len); + if (read_len < len) { + /* + * If the read fd was closed by the streaming side, the + * abort_flag will be set to 1, otherwise it is an error. + */ + if (stream->abort_flag == 0) { + PERROR("Relay reading trace file, fd: %d, offset: %" PRIu64, + stream->read_fd, + be64toh(get_packet_info.offset)); + goto error; + } else { + reply.status = htobe32(VIEWER_GET_PACKET_EOF); + goto send_reply; + } } reply.status = htobe32(VIEWER_GET_PACKET_OK); reply.len = htobe32(len); @@ -1156,18 +1469,23 @@ error: send_reply: reply.flags = htobe32(reply.flags); + + health_code_update(); ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0); if (ret < 0) { ERR("Relay data header to viewer"); goto end_unlock; } + health_code_update(); if (send_data) { + health_code_update(); ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0); if (ret < 0) { ERR("Relay send data to viewer"); goto end_unlock; } + health_code_update(); } DBG("Sent %u bytes for stream %" PRIu64, len, @@ -1187,8 +1505,7 @@ end: * Return 0 on success else a negative value. */ static -int viewer_get_metadata(struct relay_command *cmd, - struct lttng_ht *viewer_streams_ht) +int viewer_get_metadata(struct relay_command *cmd) { int ret = 0; ssize_t read_len; @@ -1199,7 +1516,6 @@ int viewer_get_metadata(struct relay_command *cmd, struct relay_viewer_stream *stream; assert(cmd); - assert(viewer_streams_ht); DBG("Relay get metadata"); @@ -1209,6 +1525,7 @@ int viewer_get_metadata(struct relay_command *cmd, goto end; } + health_code_update(); ret = cmd->sock->ops->recvmsg(cmd->sock, &request, sizeof(request), 0); if (ret < 0 || ret != sizeof(request)) { @@ -1216,10 +1533,10 @@ int viewer_get_metadata(struct relay_command *cmd, ERR("Relay didn't receive the whole packet"); goto end; } + health_code_update(); rcu_read_lock(); - stream = live_find_viewer_stream_by_id(be64toh(request.stream_id), - viewer_streams_ht); + stream = live_find_viewer_stream_by_id(be64toh(request.stream_id)); if (!stream || !stream->metadata_flag) { ERR("Invalid metadata stream"); goto error; @@ -1259,8 +1576,8 @@ int viewer_get_metadata(struct relay_command *cmd, goto error; } - read_len = read(stream->read_fd, data, len); - if (read_len < (ssize_t) len) { + read_len = lttng_read(stream->read_fd, data, len); + if (read_len < len) { PERROR("Relay reading metadata file"); goto error; } @@ -1272,11 +1589,13 @@ error: reply.status = htobe32(VIEWER_METADATA_ERR); send_reply: + health_code_update(); ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0); if (ret < 0) { ERR("Relay data header to viewer"); goto end_unlock; } + health_code_update(); if (len > 0) { ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0); @@ -1320,8 +1639,7 @@ void live_relay_unknown_command(struct relay_command *cmd) */ static int process_control(struct lttng_viewer_cmd *recv_hdr, - struct relay_command *cmd, struct lttng_ht *sessions_ht, - struct lttng_ht *viewer_streams_ht) + struct relay_command *cmd, struct lttng_ht *sessions_ht) { int ret = 0; @@ -1333,17 +1651,16 @@ int process_control(struct lttng_viewer_cmd *recv_hdr, ret = viewer_list_sessions(cmd, sessions_ht); break; case VIEWER_ATTACH_SESSION: - ret = viewer_attach_session(cmd, sessions_ht, - viewer_streams_ht); + ret = viewer_attach_session(cmd, sessions_ht); break; case VIEWER_GET_NEXT_INDEX: - ret = viewer_get_next_index(cmd, viewer_streams_ht, sessions_ht); + ret = viewer_get_next_index(cmd, sessions_ht); break; case VIEWER_GET_PACKET: - ret = viewer_get_packet(cmd, viewer_streams_ht); + ret = viewer_get_packet(cmd); break; case VIEWER_GET_METADATA: - ret = viewer_get_metadata(cmd, viewer_streams_ht); + ret = viewer_get_metadata(cmd); break; default: ERR("Received unknown viewer command (%u)", be32toh(recv_hdr->cmd)); @@ -1392,10 +1709,8 @@ int add_connection(int fd, struct lttng_poll_event *events, goto error; } - do { - ret = read(fd, relay_connection, sizeof(*relay_connection)); - } while (ret < 0 && errno == EINTR); - if (ret < 0 || ret < sizeof(*relay_connection)) { + ret = lttng_read(fd, relay_connection, sizeof(*relay_connection)); + if (ret < sizeof(*relay_connection)) { PERROR("read relay cmd pipe"); goto error_read; } @@ -1430,68 +1745,38 @@ void deferred_free_connection(struct rcu_head *head) free(relay_connection); } +/* + * Delete all streams for a specific session ID. + */ static -void deferred_free_viewer_stream(struct rcu_head *head) -{ - struct relay_viewer_stream *stream = - caa_container_of(head, struct relay_viewer_stream, rcu_node); - - if (stream->ctf_trace) { - uatomic_dec(&stream->ctf_trace->refcount); - assert(uatomic_read(&stream->ctf_trace->refcount) >= 0); - if (uatomic_read(&stream->ctf_trace->refcount) == 0) { - DBG("Freeing ctf_trace %" PRIu64, stream->ctf_trace->id); - free(stream->ctf_trace); - } - } - - free(stream->path_name); - free(stream->channel_name); - free(stream); -} - -static -void viewer_del_streams(struct lttng_ht *viewer_streams_ht, - struct relay_session *session) +void viewer_del_streams(uint64_t session_id) { - int ret; struct relay_viewer_stream *stream; - struct lttng_ht_node_u64 *node; struct lttng_ht_iter iter; - assert(viewer_streams_ht); - assert(session); - rcu_read_lock(); - cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, node, node) { - node = lttng_ht_iter_get_node_u64(&iter); - if (!node) { - continue; - } + cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, stream, + stream_n.node) { + health_code_update(); - stream = caa_container_of(node, struct relay_viewer_stream, stream_n); - if (stream->session_id != session->id) { + if (stream->session_id != session_id) { continue; } - if (stream->read_fd > 0) { - ret = close(stream->read_fd); - if (ret < 0) { - PERROR("close read_fd"); - } - } - if (stream->index_read_fd > 0) { - ret = close(stream->index_read_fd); - if (ret < 0) { - PERROR("close index_read_fd"); - } - } - if (stream->metadata_flag && stream->ctf_trace) { + delete_viewer_stream(stream); + assert(stream->ctf_trace); + + if (stream->metadata_flag) { + /* + * The metadata viewer stream is destroyed once the refcount on the + * ctf trace goes to 0 in the destroy stream function thus there is + * no explicit call to that function here. + */ stream->ctf_trace->metadata_sent = 0; + stream->ctf_trace->viewer_metadata_stream = NULL; + } else { + destroy_viewer_stream(stream); } - ret = lttng_ht_del(viewer_streams_ht, &iter); - assert(!ret); - call_rcu(&stream->rcu_node, deferred_free_viewer_stream); } rcu_read_unlock(); } @@ -1503,22 +1788,21 @@ void viewer_del_streams(struct lttng_ht *viewer_streams_ht, */ static void del_connection(struct lttng_ht *relay_connections_ht, - struct lttng_ht_iter *iter, struct relay_command *relay_connection, - struct lttng_ht *viewer_streams_ht) + struct lttng_ht_iter *iter, struct relay_command *relay_connection) { int ret; assert(relay_connections_ht); assert(iter); assert(relay_connection); - assert(viewer_streams_ht); + + DBG("Cleaning connection of session ID %" PRIu64, + relay_connection->session_id); ret = lttng_ht_del(relay_connections_ht, iter); assert(!ret); - if (relay_connection->session) { - viewer_del_streams(viewer_streams_ht, relay_connection->session); - } + viewer_del_streams(relay_connection->session_id); call_rcu(&relay_connection->rcu_node, deferred_free_connection); } @@ -1539,12 +1823,13 @@ void *thread_worker(void *data) struct lttng_viewer_cmd recv_hdr; struct relay_local_data *relay_ctx = (struct relay_local_data *) data; struct lttng_ht *sessions_ht = relay_ctx->sessions_ht; - struct lttng_ht *viewer_streams_ht = relay_ctx->viewer_streams_ht; DBG("[thread] Live viewer relay worker started"); rcu_register_thread(); + health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_WORKER); + /* table of connections indexed on socket */ relay_connections_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG); if (!relay_connections_ht) { @@ -1565,9 +1850,13 @@ restart: while (1) { int i; + health_code_update(); + /* Infinite blocking call, waiting for transmission */ DBG3("Relayd live viewer worker thread polling..."); + health_poll_entry(); ret = lttng_poll_wait(&events, -1); + health_poll_exit(); if (ret < 0) { /* * Restart interrupted system call. @@ -1590,6 +1879,8 @@ restart: uint32_t revents = LTTNG_POLL_GETEV(&events, i); int pollfd = LTTNG_POLL_GETFD(&events, i); + health_code_update(); + /* Thread quit pipe has been closed. Killing thread. */ ret = check_thread_quit_pipe(pollfd, revents); if (ret) { @@ -1624,15 +1915,14 @@ restart: sock_n); if (revents & (LPOLLERR)) { - ERR("VIEWER POLL ERROR"); cleanup_poll_connection(&events, pollfd); del_connection(relay_connections_ht, &iter, - relay_connection, viewer_streams_ht); + relay_connection); } else if (revents & (LPOLLHUP | LPOLLRDHUP)) { DBG("Viewer socket %d hung up", pollfd); cleanup_poll_connection(&events, pollfd); del_connection(relay_connections_ht, &iter, - relay_connection, viewer_streams_ht); + relay_connection); } else if (revents & LPOLLIN) { ret = relay_connection->sock->ops->recvmsg( relay_connection->sock, &recv_hdr, @@ -1641,8 +1931,8 @@ restart: /* connection closed */ if (ret <= 0) { cleanup_poll_connection(&events, pollfd); - del_connection( relay_connections_ht, &iter, - relay_connection, viewer_streams_ht); + del_connection(relay_connections_ht, &iter, + relay_connection); DBG("Viewer control connection closed with %d", pollfd); } else { @@ -1652,12 +1942,12 @@ restart: relay_connection->session->id); } ret = process_control(&recv_hdr, relay_connection, - sessions_ht, viewer_streams_ht); + sessions_ht); if (ret < 0) { /* Clear the session on error. */ cleanup_poll_connection(&events, pollfd); del_connection(relay_connections_ht, &iter, - relay_connection, viewer_streams_ht); + relay_connection); DBG("Viewer connection closed with %d", pollfd); } } @@ -1674,6 +1964,8 @@ error: /* empty the hash table and free the memory */ rcu_read_lock(); cds_lfht_for_each_entry(relay_connections_ht->ht, &iter.iter, node, node) { + health_code_update(); + node = lttng_ht_iter_get_node_ulong(&iter); if (!node) { continue; @@ -1681,8 +1973,7 @@ error: relay_connection = caa_container_of(node, struct relay_command, sock_n); - del_connection(relay_connections_ht, &iter, relay_connection, - viewer_streams_ht); + del_connection(relay_connections_ht, &iter, relay_connection); } rcu_read_unlock(); error_poll_create: @@ -1694,6 +1985,11 @@ relay_connections_ht_error: DBG("Viewer worker thread exited with error"); } DBG("Viewer worker thread cleanup complete"); + if (err) { + health_error(); + ERR("Health error occurred in %s", __func__); + } + health_unregister(health_relayd); stop_threads(); rcu_unregister_thread(); return NULL; @@ -1712,7 +2008,7 @@ static int create_relay_cmd_pipe(void) return ret; } -void live_stop_threads() +void live_stop_threads(void) { int ret; void *status; @@ -1747,7 +2043,7 @@ error: * main */ int live_start_threads(struct lttng_uri *uri, - struct relay_local_data *relay_ctx) + struct relay_local_data *relay_ctx, int quit_pipe[2]) { int ret = 0; void *status; @@ -1756,10 +2052,8 @@ int live_start_threads(struct lttng_uri *uri, assert(uri); live_uri = uri; - /* Create thread quit pipe */ - if ((ret = init_thread_quit_pipe()) < 0) { - goto error; - } + live_thread_quit_pipe[0] = quit_pipe[0]; + live_thread_quit_pipe[1] = quit_pipe[1]; /* Check if daemon is UID = 0 */ is_root = !getuid();