Fix: wrong check before destroying the viewer metadata stream
[lttng-tools.git] / src / bin / lttng-relayd / live.c
index cbf32790c88f90d3f7641e05ef1467dd9c76749e..53cc660918d463aacae382c6a2c858281925f26d 100644 (file)
@@ -56,6 +56,7 @@
 #include "lttng-relayd.h"
 #include "lttng-viewer.h"
 #include "utils.h"
+#include "health-relayd.h"
 
 static struct lttng_uri *live_uri;
 
@@ -96,8 +97,6 @@ void cleanup(void)
 {
        DBG("Cleaning up");
 
-       /* Close thread quit pipes */
-       utils_close_pipe(live_thread_quit_pipe);
        free(live_uri);
 }
 
@@ -107,16 +106,14 @@ void cleanup(void)
 static
 int notify_thread_pipe(int wpipe)
 {
-       int ret;
+       ssize_t ret;
 
-       do {
-               ret = write(wpipe, "!", 1);
-       } while (ret < 0 && errno == EINTR);
-       if (ret < 0 || ret != 1) {
+       ret = lttng_write(wpipe, "!", 1);
+       if (ret < 1) {
                PERROR("write poll pipe");
        }
 
-       return ret;
+       return (int) ret;
 }
 
 /*
@@ -139,21 +136,6 @@ void stop_threads(void)
        futex_nto1_wake(&viewer_cmd_queue.futex);
 }
 
-/*
- * Init thread quit pipe.
- *
- * Return -1 on error or 0 if all pipes are created.
- */
-static
-int init_thread_quit_pipe(void)
-{
-       int ret;
-
-       ret = utils_create_pipe_cloexec(live_thread_quit_pipe);
-
-       return ret;
-}
-
 /*
  * Create a poll set with O_CLOEXEC and add the thread quit pipe to the set.
  */
@@ -254,6 +236,10 @@ void *thread_listener(void *data)
 
        DBG("[thread] Relay live listener started");
 
+       health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_LISTENER);
+
+       health_code_update();
+
        live_control_sock = init_socket(live_uri);
        if (!live_control_sock) {
                goto error_sock_control;
@@ -274,10 +260,14 @@ void *thread_listener(void *data)
        }
 
        while (1) {
+               health_code_update();
+
                DBG("Listener accepting live viewers connections");
 
 restart:
+               health_poll_entry();
                ret = lttng_poll_wait(&events, -1);
+               health_poll_exit();
                if (ret < 0) {
                        /*
                         * Restart interrupted system call.
@@ -291,6 +281,8 @@ restart:
 
                DBG("Relay new viewer connection received");
                for (i = 0; i < nb_fd; i++) {
+                       health_code_update();
+
                        /* Fetch once the poll data */
                        revents = LTTNG_POLL_GETEV(&events, i);
                        pollfd = LTTNG_POLL_GETFD(&events, i);
@@ -365,8 +357,10 @@ error_create_poll:
        lttcomm_destroy_sock(live_control_sock);
 error_sock_control:
        if (err) {
+               health_error();
                DBG("Live viewer listener thread exited with error");
        }
+       health_unregister(health_relayd);
        DBG("Live viewer listener thread cleanup complete");
        stop_threads();
        return NULL;
@@ -378,17 +372,26 @@ error_sock_control:
 static
 void *thread_dispatcher(void *data)
 {
-       int ret;
+       int err = -1;
+       ssize_t ret;
        struct cds_wfq_node *node;
        struct relay_command *relay_cmd = NULL;
 
        DBG("[thread] Live viewer relay dispatcher started");
 
+       health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_DISPATCHER);
+
+       health_code_update();
+
        while (!CMM_LOAD_SHARED(live_dispatch_thread_exit)) {
+               health_code_update();
+
                /* Atomically prepare the queue futex */
                futex_nto1_prepare(&viewer_cmd_queue.futex);
 
                do {
+                       health_code_update();
+
                        /* Dequeue commands */
                        node = cds_wfq_dequeue_blocking(&viewer_cmd_queue.queue);
                        if (node == NULL) {
@@ -407,22 +410,30 @@ void *thread_dispatcher(void *data)
                         * so we can be assured that the data will be read at some point in
                         * time or wait to the end of the world :)
                         */
-                       do {
-                               ret = write(live_relay_cmd_pipe[1], relay_cmd,
-                                               sizeof(*relay_cmd));
-                       } while (ret < 0 && errno == EINTR);
+                       ret = lttng_write(live_relay_cmd_pipe[1], relay_cmd,
+                                       sizeof(*relay_cmd));
                        free(relay_cmd);
-                       if (ret < 0 || ret != sizeof(struct relay_command)) {
+                       if (ret < sizeof(struct relay_command)) {
                                PERROR("write cmd pipe");
                                goto error;
                        }
                } while (node != NULL);
 
                /* Futex wait on queue. Blocking call on futex() */
+               health_poll_entry();
                futex_nto1_wait(&viewer_cmd_queue.futex);
+               health_poll_exit();
        }
 
+       /* Normal exit, no error */
+       err = 0;
+
 error:
+       if (err) {
+               health_error();
+               ERR("Health error occurred in %s", __func__);
+       }
+       health_unregister(health_relayd);
        DBG("Live viewer dispatch thread dying");
        stop_threads();
        return NULL;
@@ -443,6 +454,8 @@ int viewer_connect(struct relay_command *cmd)
 
        cmd->version_check_done = 1;
 
+       health_code_update();
+
        /* Get version from the other side. */
        ret = cmd->sock->ops->recvmsg(cmd->sock, &msg, sizeof(msg), 0);
        if (ret < 0 || ret != sizeof(msg)) {
@@ -456,6 +469,8 @@ int viewer_connect(struct relay_command *cmd)
                goto end;
        }
 
+       health_code_update();
+
        reply.major = RELAYD_VERSION_COMM_MAJOR;
        reply.minor = RELAYD_VERSION_COMM_MINOR;
 
@@ -463,7 +478,7 @@ int viewer_connect(struct relay_command *cmd)
        if (reply.major != be32toh(msg.major)) {
                DBG("Incompatible major versions (%u vs %u)", reply.major,
                                be32toh(msg.major));
-               ret = 0;
+               ret = -1;
                goto end;
        }
 
@@ -490,12 +505,17 @@ int viewer_connect(struct relay_command *cmd)
        if (cmd->type == RELAY_VIEWER_COMMAND) {
                reply.viewer_session_id = htobe64(++last_relay_viewer_session_id);
        }
+
+       health_code_update();
+
        ret = cmd->sock->ops->sendmsg(cmd->sock, &reply,
                        sizeof(struct lttng_viewer_connect), 0);
        if (ret < 0) {
                ERR("Relay sending version");
        }
 
+       health_code_update();
+
        DBG("Version check done using protocol %u.%u", cmd->major, cmd->minor);
        ret = 0;
 
@@ -533,6 +553,8 @@ int viewer_list_sessions(struct relay_command *cmd,
        cds_lfht_count_nodes(sessions_ht->ht, &approx_before, &count, &approx_after);
        session_list.sessions_count = htobe32(count);
 
+       health_code_update();
+
        ret = cmd->sock->ops->sendmsg(cmd->sock, &session_list,
                        sizeof(session_list), 0);
        if (ret < 0) {
@@ -540,7 +562,11 @@ int viewer_list_sessions(struct relay_command *cmd,
                goto end_unlock;
        }
 
+       health_code_update();
+
        cds_lfht_for_each_entry(sessions_ht->ht, &iter.iter, node, node) {
+               health_code_update();
+
                node = lttng_ht_iter_get_node_ulong(&iter);
                if (!node) {
                        goto end_unlock;
@@ -554,6 +580,9 @@ int viewer_list_sessions(struct relay_command *cmd,
                send_session.id = htobe64(session->id);
                send_session.live_timer = htobe32(session->live_timer);
                send_session.clients = htobe32(session->viewer_attached);
+               send_session.streams = htobe32(session->stream_count);
+
+               health_code_update();
 
                ret = cmd->sock->ops->sendmsg(cmd->sock, &send_session,
                                sizeof(send_session), 0);
@@ -562,6 +591,8 @@ int viewer_list_sessions(struct relay_command *cmd,
                        goto end_unlock;
                }
        }
+       health_code_update();
+
        rcu_read_unlock();
        ret = 0;
        goto end;
@@ -574,6 +605,67 @@ end_no_session:
        return ret;
 }
 
+/*
+ * Open index file using a given viewer stream.
+ *
+ * Return 0 on success or else a negative value.
+ */
+static int open_index(struct relay_viewer_stream *stream)
+{
+       int ret;
+       char fullpath[PATH_MAX];
+       struct ctf_packet_index_file_hdr hdr;
+
+       if (stream->tracefile_count > 0) {
+               ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s_%"
+                               PRIu64 DEFAULT_INDEX_FILE_SUFFIX, stream->path_name,
+                               stream->channel_name, stream->tracefile_count_current);
+       } else {
+               ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR "/%s"
+                               DEFAULT_INDEX_FILE_SUFFIX, stream->path_name,
+                               stream->channel_name);
+       }
+       if (ret < 0) {
+               PERROR("snprintf index path");
+               goto error;
+       }
+
+       DBG("Opening index file %s in read only", fullpath);
+       ret = open(fullpath, O_RDONLY);
+       if (ret < 0) {
+               if (errno == ENOENT) {
+                       ret = -ENOENT;
+                       goto error;
+               } else {
+                       PERROR("opening index in read-only");
+               }
+               goto error;
+       }
+       stream->index_read_fd = ret;
+       DBG("Opening index file %s in read only, (fd: %d)", fullpath, ret);
+
+       ret = lttng_read(stream->index_read_fd, &hdr, sizeof(hdr));
+       if (ret < sizeof(hdr)) {
+               PERROR("Reading index header");
+               goto error;
+       }
+       if (be32toh(hdr.magic) != CTF_INDEX_MAGIC) {
+               ERR("Invalid header magic");
+               ret = -1;
+               goto error;
+       }
+       if (be32toh(hdr.index_major) != CTF_INDEX_MAJOR ||
+                       be32toh(hdr.index_minor) != CTF_INDEX_MINOR) {
+               ERR("Invalid header version");
+               ret = -1;
+               goto error;
+       }
+       ret = 0;
+
+error:
+       return ret;
+}
+
 /*
  * Allocate and init a new viewer_stream.
  *
@@ -585,7 +677,7 @@ end_no_session:
  * Returns 0 on success or a negative value on error.
  */
 static
-int init_viewer_stream(struct relay_stream *stream)
+int init_viewer_stream(struct relay_stream *stream, int seek_last)
 {
        int ret;
        struct relay_viewer_stream *viewer_stream;
@@ -598,40 +690,159 @@ int init_viewer_stream(struct relay_stream *stream)
                ret = -1;
                goto error;
        }
-
-       viewer_stream->read_fd = -1;
-       viewer_stream->index_read_fd = -1;
        viewer_stream->session_id = stream->session->id;
        viewer_stream->stream_handle = stream->stream_handle;
        viewer_stream->path_name = strndup(stream->path_name,
                        LTTNG_VIEWER_PATH_MAX);
        viewer_stream->channel_name = strndup(stream->channel_name,
                        LTTNG_VIEWER_NAME_MAX);
-       viewer_stream->total_index_received = stream->total_index_received;
-       viewer_stream->tracefile_size = stream->tracefile_size;
        viewer_stream->tracefile_count = stream->tracefile_count;
        viewer_stream->metadata_flag = stream->metadata_flag;
+       viewer_stream->tracefile_count_last = -1ULL;
+       if (seek_last) {
+               viewer_stream->tracefile_count_current =
+                       stream->tracefile_count_current;
+       } else {
+               viewer_stream->tracefile_count_current =
+                       stream->oldest_tracefile_id;
+       }
+
+       viewer_stream->ctf_trace = stream->ctf_trace;
+       if (viewer_stream->metadata_flag) {
+               viewer_stream->ctf_trace->viewer_metadata_stream =
+                       viewer_stream;
+       }
+       uatomic_inc(&viewer_stream->ctf_trace->refcount);
+
+       lttng_ht_node_init_u64(&viewer_stream->stream_n, stream->stream_handle);
+       lttng_ht_add_unique_u64(viewer_streams_ht, &viewer_stream->stream_n);
+
+       viewer_stream->index_read_fd = -1;
+       viewer_stream->read_fd = -1;
 
        /*
         * This is to avoid a race between the initialization of this object and
         * the close of the given stream. If the stream is unable to find this
         * viewer stream when closing, this copy will at least take the latest
         * value.
+        * We also need that for the seek_last.
         */
        viewer_stream->total_index_received = stream->total_index_received;
 
        /*
-        * The deletion of this ctf_trace object is only done in a call RCU of the
-        * relay stream making it valid as long as we have the read side lock.
+        * If we never received an index for the current stream, delay
+        * the opening of the index, otherwise open it right now.
         */
-       viewer_stream->ctf_trace = stream->ctf_trace;
-       uatomic_inc(&viewer_stream->ctf_trace->refcount);
+       if (viewer_stream->tracefile_count_current ==
+                       stream->tracefile_count_current &&
+                       viewer_stream->total_index_received == 0) {
+               viewer_stream->index_read_fd = -1;
+       } else {
+               ret = open_index(viewer_stream);
+               if (ret < 0) {
+                       goto error;
+               }
+       }
 
-       lttng_ht_node_init_u64(&viewer_stream->stream_n, stream->stream_handle);
-       lttng_ht_add_unique_u64(viewer_streams_ht, &viewer_stream->stream_n);
+       if (seek_last && viewer_stream->index_read_fd > 0) {
+               ret = lseek(viewer_stream->index_read_fd,
+                               viewer_stream->total_index_received *
+                                       sizeof(struct ctf_packet_index),
+                               SEEK_CUR);
+               if (ret < 0) {
+                       goto error;
+               }
+               viewer_stream->last_sent_index =
+                       viewer_stream->total_index_received;
+       }
+
+       ret = 0;
+
+error:
+       return ret;
+}
+
+/*
+ * Rotate a stream to the next tracefile.
+ *
+ * Returns 0 on success, 1 on EOF, a negative value on error.
+ */
+static
+int rotate_viewer_stream(struct relay_viewer_stream *viewer_stream,
+               struct relay_stream *stream)
+{
+       int ret;
+       uint64_t tracefile_id;
+
+       assert(viewer_stream);
+
+       tracefile_id = (viewer_stream->tracefile_count_current + 1) %
+               viewer_stream->tracefile_count;
+       /*
+        * Detect the last tracefile to open.
+        */
+       if (viewer_stream->tracefile_count_last != -1ULL &&
+                       viewer_stream->tracefile_count_last ==
+                       viewer_stream->tracefile_count_current) {
+               ret = 1;
+               goto end;
+       }
+
+       if (stream) {
+               pthread_mutex_lock(&stream->viewer_stream_rotation_lock);
+       }
+       /*
+        * The writer and the reader are not working in the same
+        * tracefile, we can read up to EOF, we don't care about the
+        * total_index_received.
+        */
+       if (!stream || (stream->tracefile_count_current != tracefile_id)) {
+               viewer_stream->close_write_flag = 1;
+       } else {
+               /*
+                * We are opening a file that is still open in write, make
+                * sure we limit our reading to the number of indexes
+                * received.
+                */
+               viewer_stream->close_write_flag = 0;
+               if (stream) {
+                       viewer_stream->total_index_received =
+                               stream->total_index_received;
+               }
+       }
+       viewer_stream->tracefile_count_current = tracefile_id;
+
+       ret = close(viewer_stream->index_read_fd);
+       if (ret < 0) {
+               PERROR("close index file %d",
+                               viewer_stream->index_read_fd);
+       }
+       viewer_stream->index_read_fd = -1;
+       ret = close(viewer_stream->read_fd);
+       if (ret < 0) {
+               PERROR("close tracefile %d",
+                               viewer_stream->read_fd);
+       }
+       viewer_stream->read_fd = -1;
+
+       pthread_mutex_lock(&viewer_stream->overwrite_lock);
+       viewer_stream->abort_flag = 0;
+       pthread_mutex_unlock(&viewer_stream->overwrite_lock);
+
+       viewer_stream->index_read_fd = -1;
+       viewer_stream->read_fd = -1;
+
+       if (stream) {
+               pthread_mutex_unlock(&stream->viewer_stream_rotation_lock);
+       }
+       ret = open_index(viewer_stream);
+       if (ret < 0) {
+               goto error;
+       }
 
        ret = 0;
 
+end:
 error:
        return ret;
 }
@@ -643,7 +854,8 @@ static
 int viewer_attach_session(struct relay_command *cmd,
                struct lttng_ht *sessions_ht)
 {
-       int ret, send_streams = 0, nb_streams = 0;
+       int ret, send_streams = 0;
+       uint32_t nb_streams = 0, nb_streams_ready = 0;
        struct lttng_viewer_attach_session_request request;
        struct lttng_viewer_attach_session_response response;
        struct lttng_viewer_stream send_stream;
@@ -653,6 +865,7 @@ int viewer_attach_session(struct relay_command *cmd,
        struct lttng_ht_node_u64 *node64;
        struct lttng_ht_iter iter;
        struct relay_session *session;
+       int seek_last = 0;
 
        assert(cmd);
        assert(sessions_ht);
@@ -665,6 +878,8 @@ int viewer_attach_session(struct relay_command *cmd,
                goto end_no_session;
        }
 
+       health_code_update();
+
        ret = cmd->sock->ops->recvmsg(cmd->sock, &request, sizeof(request), 0);
        if (ret < 0 || ret != sizeof(request)) {
                if (ret == 0) {
@@ -677,6 +892,8 @@ int viewer_attach_session(struct relay_command *cmd,
                goto error;
        }
 
+       health_code_update();
+
        rcu_read_lock();
        lttng_ht_lookup(sessions_ht,
                        (void *)((unsigned long) be64toh(request.session_id)), &iter);
@@ -714,7 +931,7 @@ int viewer_attach_session(struct relay_command *cmd,
                /* Default behaviour. */
                break;
        case VIEWER_SEEK_LAST:
-               /* TODO */
+               seek_last = 1;
                break;
        default:
                ERR("Wrong seek parameter");
@@ -735,6 +952,8 @@ int viewer_attach_session(struct relay_command *cmd,
                cds_lfht_for_each_entry(relay_streams_ht->ht, &iter.iter, node, node) {
                        struct relay_viewer_stream *vstream;
 
+                       health_code_update();
+
                        node = lttng_ht_iter_get_node_ulong(&iter);
                        if (!node) {
                                continue;
@@ -743,39 +962,47 @@ int viewer_attach_session(struct relay_command *cmd,
                        if (stream->session != cmd->session) {
                                continue;
                        }
+                       nb_streams++;
 
                        /*
-                        * Don't send streams with no ctf_trace, they are not ready to be
-                        * read.
+                        * Don't send streams with no ctf_trace, they are not
+                        * ready to be read.
                         */
-                       if (!stream->ctf_trace) {
+                       if (!stream->ctf_trace || !stream->viewer_ready) {
                                continue;
                        }
+                       nb_streams_ready++;
 
                        vstream = live_find_viewer_stream_by_id(stream->stream_handle);
                        if (!vstream) {
-                               ret = init_viewer_stream(stream);
+                               ret = init_viewer_stream(stream, seek_last);
                                if (ret < 0) {
                                        goto end_unlock;
                                }
                        }
-                       nb_streams++;
+               }
+
+               /* We must have the same amount of existing stream and ready stream. */
+               if (nb_streams != nb_streams_ready) {
+                       nb_streams = 0;
                }
                response.streams_count = htobe32(nb_streams);
        }
 
 send_reply:
+       health_code_update();
        ret = cmd->sock->ops->sendmsg(cmd->sock, &response, sizeof(response), 0);
        if (ret < 0) {
                ERR("Relay sending viewer attach response");
                goto end_unlock;
        }
+       health_code_update();
 
        /*
-        * Unknown or busy session, just return gracefully, the viewer knows what
+        * Unknown or empty session, just return gracefully, the viewer knows what
         * is happening.
         */
-       if (!send_streams) {
+       if (!send_streams || !nb_streams) {
                ret = 0;
                goto end_unlock;
        }
@@ -783,6 +1010,8 @@ send_reply:
        /* We should only be there if we have a session to attach to. */
        assert(session);
        cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, node, node) {
+               health_code_update();
+
                node64 = lttng_ht_iter_get_node_u64(&iter);
                if (!node64) {
                        continue;
@@ -818,70 +1047,6 @@ error:
        return ret;
 }
 
-/*
- * Open index file using a given viewer stream.
- *
- * Return 0 on success or else a negative value.
- */
-static int open_index(struct relay_viewer_stream *stream)
-{
-       int ret;
-       char fullpath[PATH_MAX];
-       struct lttng_packet_index_file_hdr hdr;
-
-       if (stream->tracefile_size > 0) {
-               /* For now we don't support on-disk ring buffer. */
-               ret = -1;
-               goto end;
-       } else {
-               ret = snprintf(fullpath, sizeof(fullpath), "%s/" DEFAULT_INDEX_DIR
-                               "/%s" DEFAULT_INDEX_FILE_SUFFIX,
-                               stream->path_name, stream->channel_name);
-               if (ret < 0) {
-                       PERROR("snprintf index path");
-                       goto error;
-               }
-       }
-
-       DBG("Opening index file %s in read only", fullpath);
-       ret = open(fullpath, O_RDONLY);
-       if (ret < 0) {
-               if (errno == ENOENT) {
-                       ret = ENOENT;
-                       goto error;
-               } else {
-                       PERROR("opening index in read-only");
-               }
-               goto error;
-       }
-       stream->index_read_fd = ret;
-       DBG("Opening index file %s in read only, (fd: %d)", fullpath, ret);
-
-       do {
-               ret = read(stream->index_read_fd, &hdr, sizeof(hdr));
-       } while (ret < 0 && errno == EINTR);
-       if (ret < 0) {
-               PERROR("Reading index header");
-               goto error;
-       }
-       if (strncmp(hdr.magic, INDEX_MAGIC, sizeof(hdr.magic)) != 0) {
-               ERR("Invalid header magic");
-               ret = -1;
-               goto error;
-       }
-       if (be32toh(hdr.index_major) != INDEX_MAJOR ||
-                       be32toh(hdr.index_minor) != INDEX_MINOR) {
-               ERR("Invalid header version");
-               ret = -1;
-               goto error;
-       }
-       ret = 0;
-
-error:
-end:
-       return ret;
-}
-
 /*
  * Get viewer stream from stream id.
  *
@@ -905,6 +1070,72 @@ end:
        return stream;
 }
 
+static
+void deferred_free_viewer_stream(struct rcu_head *head)
+{
+       struct relay_viewer_stream *stream =
+               caa_container_of(head, struct relay_viewer_stream, rcu_node);
+
+       free(stream->path_name);
+       free(stream->channel_name);
+       free(stream);
+}
+
+static
+void delete_viewer_stream(struct relay_viewer_stream *vstream)
+{
+       int delret;
+       struct lttng_ht_iter iter;
+
+       iter.iter.node = &vstream->stream_n.node;
+       delret = lttng_ht_del(viewer_streams_ht, &iter);
+       assert(!delret);
+}
+
+static
+void destroy_viewer_stream(struct relay_viewer_stream *vstream)
+{
+       unsigned long ret_ref;
+       int ret;
+
+       assert(vstream);
+       ret_ref = uatomic_add_return(&vstream->ctf_trace->refcount, -1);
+       assert(ret_ref >= 0);
+
+       if (vstream->read_fd >= 0) {
+               ret = close(vstream->read_fd);
+               if (ret < 0) {
+                       PERROR("close read_fd");
+               }
+       }
+       if (vstream->index_read_fd >= 0) {
+               ret = close(vstream->index_read_fd);
+               if (ret < 0) {
+                       PERROR("close index_read_fd");
+               }
+       }
+
+       /*
+        * If the only stream left in the HT is the metadata stream,
+        * we need to remove it because we won't detect a EOF for this
+        * stream.
+        */
+       if (ret_ref == 1 && vstream->ctf_trace->viewer_metadata_stream) {
+               destroy_viewer_stream(vstream->ctf_trace->viewer_metadata_stream);
+               vstream->ctf_trace->metadata_stream = NULL;
+               DBG("Freeing ctf_trace %" PRIu64, vstream->ctf_trace->id);
+               /*
+                * The streaming-side is already closed and we can't receive a new
+                * stream concurrently at this point (since the session is being
+                * destroyed), so when we detect the refcount equals 0, we are the
+                * only owners of the ctf_trace and we can free it ourself.
+                */
+               free(vstream->ctf_trace);
+       }
+
+       call_rcu(&vstream->rcu_node, deferred_free_viewer_stream);
+}
+
 /*
  * Send the next index for a stream.
  *
@@ -917,7 +1148,7 @@ int viewer_get_next_index(struct relay_command *cmd,
        int ret;
        struct lttng_viewer_get_next_index request_index;
        struct lttng_viewer_index viewer_index;
-       struct lttng_packet_index packet_index;
+       struct ctf_packet_index packet_index;
        struct relay_viewer_stream *vstream;
        struct relay_stream *rstream;
 
@@ -932,6 +1163,7 @@ int viewer_get_next_index(struct relay_command *cmd,
                goto end_no_session;
        }
 
+       health_code_update();
        ret = cmd->sock->ops->recvmsg(cmd->sock, &request_index,
                        sizeof(request_index), 0);
        if (ret < 0 || ret != sizeof(request_index)) {
@@ -939,6 +1171,7 @@ int viewer_get_next_index(struct relay_command *cmd,
                ERR("Relay didn't receive the whole packet");
                goto end;
        }
+       health_code_update();
 
        rcu_read_lock();
        vstream = live_find_viewer_stream_by_id(be64toh(request_index.stream_id));
@@ -960,7 +1193,7 @@ int viewer_get_next_index(struct relay_command *cmd,
        /* First time, we open the index file */
        if (vstream->index_read_fd < 0) {
                ret = open_index(vstream);
-               if (ret == ENOENT) {
+               if (ret == -ENOENT) {
                        /*
                         * The index is created only when the first data packet arrives, it
                         * might not be ready at the beginning of the session
@@ -975,23 +1208,50 @@ int viewer_get_next_index(struct relay_command *cmd,
 
        rstream = relay_stream_find_by_id(vstream->stream_handle);
        if (rstream) {
-               if (rstream->beacon_ts_end != -1ULL &&
-                               vstream->last_sent_index == rstream->total_index_received) {
-                       viewer_index.status = htobe32(VIEWER_INDEX_INACTIVE);
-                       viewer_index.timestamp_end = htobe64(rstream->beacon_ts_end);
-                       goto send_reply;
+               if (vstream->abort_flag) {
+                       /* Rotate on abort (overwrite). */
+                       DBG("Viewer rotate because of overwrite");
+                       ret = rotate_viewer_stream(vstream, rstream);
+                       if (ret < 0) {
+                               goto end_unlock;
+                       } else if (ret == 1) {
+                               viewer_index.status = htobe32(VIEWER_INDEX_HUP);
+                               delete_viewer_stream(vstream);
+                               destroy_viewer_stream(vstream);
+                               goto send_reply;
+                       }
                }
-
-               if (rstream->total_index_received <= vstream->last_sent_index) {
-                       /* No new index to send, retry later. */
-                       viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
-                       goto send_reply;
+               pthread_mutex_lock(&rstream->viewer_stream_rotation_lock);
+               if (rstream->tracefile_count_current == vstream->tracefile_count_current) {
+                       if (rstream->beacon_ts_end != -1ULL &&
+                               vstream->last_sent_index == rstream->total_index_received) {
+                               viewer_index.status = htobe32(VIEWER_INDEX_INACTIVE);
+                               viewer_index.timestamp_end = htobe64(rstream->beacon_ts_end);
+                               pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
+                               goto send_reply;
+                       /*
+                        * Reader and writer are working in the same tracefile, so we care
+                        * about the number of index received and sent. Otherwise, we read
+                        * up to EOF.
+                        */
+                       } else if (rstream->total_index_received <= vstream->last_sent_index
+                                       && !vstream->close_write_flag) {
+                               pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
+                               /* No new index to send, retry later. */
+                               viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
+                               goto send_reply;
+                       }
                }
-       } else if (!rstream &&
+               pthread_mutex_unlock(&rstream->viewer_stream_rotation_lock);
+       } else if (!rstream && vstream->close_write_flag &&
                        vstream->total_index_received == vstream->last_sent_index) {
-               /* Last index sent and stream closed */
+               /* Last index sent and current tracefile closed in write */
                viewer_index.status = htobe32(VIEWER_INDEX_HUP);
+               delete_viewer_stream(vstream);
+               destroy_viewer_stream(vstream);
                goto send_reply;
+       } else {
+               vstream->close_write_flag = 1;
        }
 
        if (!vstream->ctf_trace->metadata_received ||
@@ -1000,13 +1260,50 @@ int viewer_get_next_index(struct relay_command *cmd,
                viewer_index.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
        }
 
-       do {
-               ret = read(vstream->index_read_fd, &packet_index,
-                               sizeof(packet_index));
-       } while (ret < 0 && errno == EINTR);
+       pthread_mutex_lock(&vstream->overwrite_lock);
+       if (vstream->abort_flag) {
+               /*
+                * The file is being overwritten by the writer, we cannot
+                * use it.
+                */
+               viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
+               pthread_mutex_unlock(&vstream->overwrite_lock);
+               ret = rotate_viewer_stream(vstream, rstream);
+               if (ret < 0) {
+                       goto end_unlock;
+               } else if (ret == 1) {
+                       viewer_index.status = htobe32(VIEWER_INDEX_HUP);
+                       delete_viewer_stream(vstream);
+                       destroy_viewer_stream(vstream);
+                       goto send_reply;
+               }
+               goto send_reply;
+       }
+       ret = lttng_read(vstream->index_read_fd, &packet_index,
+                       sizeof(packet_index));
+       pthread_mutex_unlock(&vstream->overwrite_lock);
        if (ret < sizeof(packet_index)) {
-               PERROR("Relay reading index file");
-               viewer_index.status = htobe32(VIEWER_INDEX_ERR);
+               /*
+                * The tracefile is closed in write, so we read up to EOF.
+                */
+               if (vstream->close_write_flag == 1) {
+                       viewer_index.status = htobe32(VIEWER_INDEX_RETRY);
+                       /* Rotate on normal EOF */
+                       ret = rotate_viewer_stream(vstream, rstream);
+                       if (ret < 0) {
+                               goto end_unlock;
+                       } else if (ret == 1) {
+                               viewer_index.status = htobe32(VIEWER_INDEX_HUP);
+                               delete_viewer_stream(vstream);
+                               destroy_viewer_stream(vstream);
+                               goto send_reply;
+                       }
+               } else {
+                       PERROR("Relay reading index file %d",
+                                       vstream->index_read_fd);
+                       viewer_index.status = htobe32(VIEWER_INDEX_ERR);
+               }
+               goto send_reply;
        } else {
                viewer_index.status = htobe32(VIEWER_INDEX_OK);
                vstream->last_sent_index++;
@@ -1025,12 +1322,14 @@ int viewer_get_next_index(struct relay_command *cmd,
 
 send_reply:
        viewer_index.flags = htobe32(viewer_index.flags);
+       health_code_update();
        ret = cmd->sock->ops->sendmsg(cmd->sock, &viewer_index,
                        sizeof(viewer_index), 0);
        if (ret < 0) {
                ERR("Relay index to viewer");
                goto end_unlock;
        }
+       health_code_update();
 
        DBG("Index %" PRIu64 "for stream %" PRIu64 "sent",
                        vstream->last_sent_index, vstream->stream_handle);
@@ -1069,6 +1368,7 @@ int viewer_get_packet(struct relay_command *cmd)
                goto end;
        }
 
+       health_code_update();
        ret = cmd->sock->ops->recvmsg(cmd->sock, &get_packet_info,
                        sizeof(get_packet_info), 0);
        if (ret < 0 || ret != sizeof(get_packet_info)) {
@@ -1076,6 +1376,10 @@ int viewer_get_packet(struct relay_command *cmd)
                ERR("Relay didn't receive the whole packet");
                goto end;
        }
+       health_code_update();
+
+       /* From this point on, the error label can be reached. */
+       memset(&reply, 0, sizeof(reply));
 
        rcu_read_lock();
        stream = live_find_viewer_stream_by_id(be64toh(get_packet_info.stream_id));
@@ -1092,8 +1396,14 @@ int viewer_get_packet(struct relay_command *cmd)
        if (stream->read_fd < 0) {
                char fullpath[PATH_MAX];
 
-               ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
-                               stream->channel_name);
+               if (stream->tracefile_count > 0) {
+                       ret = snprintf(fullpath, PATH_MAX, "%s/%s_%" PRIu64, stream->path_name,
+                                       stream->channel_name,
+                                       stream->tracefile_count_current);
+               } else {
+                       ret = snprintf(fullpath, PATH_MAX, "%s/%s", stream->path_name,
+                                       stream->channel_name);
+               }
                if (ret < 0) {
                        goto error;
                }
@@ -1105,14 +1415,11 @@ int viewer_get_packet(struct relay_command *cmd)
                stream->read_fd = ret;
        }
 
-       memset(&reply, 0, sizeof(reply));
-
        if (!stream->ctf_trace->metadata_received ||
                        stream->ctf_trace->metadata_received >
                        stream->ctf_trace->metadata_sent) {
                reply.status = htobe32(VIEWER_GET_PACKET_ERR);
                reply.flags |= LTTNG_VIEWER_FLAG_NEW_METADATA;
-
                goto send_reply;
        }
 
@@ -1125,14 +1432,32 @@ int viewer_get_packet(struct relay_command *cmd)
 
        ret = lseek(stream->read_fd, be64toh(get_packet_info.offset), SEEK_SET);
        if (ret < 0) {
-               PERROR("lseek");
-               goto error;
+               /*
+                * If the read fd was closed by the streaming side, the
+                * abort_flag will be set to 1, otherwise it is an error.
+                */
+               if (stream->abort_flag == 0) {
+                       PERROR("lseek");
+                       goto error;
+               }
+               reply.status = htobe32(VIEWER_GET_PACKET_EOF);
+               goto send_reply;
        }
-       read_len = read(stream->read_fd, data, len);
-       if (read_len < (ssize_t) len) {
-               PERROR("Relay reading trace file, fd: %d, offset: %" PRIu64,
-                               stream->read_fd, be64toh(get_packet_info.offset));
-               goto error;
+       read_len = lttng_read(stream->read_fd, data, len);
+       if (read_len < len) {
+               /*
+                * If the read fd was closed by the streaming side, the
+                * abort_flag will be set to 1, otherwise it is an error.
+                */
+               if (stream->abort_flag == 0) {
+                       PERROR("Relay reading trace file, fd: %d, offset: %" PRIu64,
+                                       stream->read_fd,
+                                       be64toh(get_packet_info.offset));
+                       goto error;
+               } else {
+                       reply.status = htobe32(VIEWER_GET_PACKET_EOF);
+                       goto send_reply;
+               }
        }
        reply.status = htobe32(VIEWER_GET_PACKET_OK);
        reply.len = htobe32(len);
@@ -1144,18 +1469,23 @@ error:
 
 send_reply:
        reply.flags = htobe32(reply.flags);
+
+       health_code_update();
        ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
        if (ret < 0) {
                ERR("Relay data header to viewer");
                goto end_unlock;
        }
+       health_code_update();
 
        if (send_data) {
+               health_code_update();
                ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
                if (ret < 0) {
                        ERR("Relay send data to viewer");
                        goto end_unlock;
                }
+               health_code_update();
        }
 
        DBG("Sent %u bytes for stream %" PRIu64, len,
@@ -1195,6 +1525,7 @@ int viewer_get_metadata(struct relay_command *cmd)
                goto end;
        }
 
+       health_code_update();
        ret = cmd->sock->ops->recvmsg(cmd->sock, &request,
                        sizeof(request), 0);
        if (ret < 0 || ret != sizeof(request)) {
@@ -1202,6 +1533,7 @@ int viewer_get_metadata(struct relay_command *cmd)
                ERR("Relay didn't receive the whole packet");
                goto end;
        }
+       health_code_update();
 
        rcu_read_lock();
        stream = live_find_viewer_stream_by_id(be64toh(request.stream_id));
@@ -1244,8 +1576,8 @@ int viewer_get_metadata(struct relay_command *cmd)
                goto error;
        }
 
-       read_len = read(stream->read_fd, data, len);
-       if (read_len < (ssize_t) len) {
+       read_len = lttng_read(stream->read_fd, data, len);
+       if (read_len < len) {
                PERROR("Relay reading metadata file");
                goto error;
        }
@@ -1257,11 +1589,13 @@ error:
        reply.status = htobe32(VIEWER_METADATA_ERR);
 
 send_reply:
+       health_code_update();
        ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
        if (ret < 0) {
                ERR("Relay data header to viewer");
                goto end_unlock;
        }
+       health_code_update();
 
        if (len > 0) {
                ret = cmd->sock->ops->sendmsg(cmd->sock, data, len, 0);
@@ -1375,10 +1709,8 @@ int add_connection(int fd, struct lttng_poll_event *events,
                goto error;
        }
 
-       do {
-               ret = read(fd, relay_connection, sizeof(*relay_connection));
-       } while (ret < 0 && errno == EINTR);
-       if (ret < 0 || ret < sizeof(*relay_connection)) {
+       ret = lttng_read(fd, relay_connection, sizeof(*relay_connection));
+       if (ret < sizeof(*relay_connection)) {
                PERROR("read relay cmd pipe");
                goto error_read;
        }
@@ -1413,64 +1745,38 @@ void deferred_free_connection(struct rcu_head *head)
        free(relay_connection);
 }
 
-static
-void deferred_free_viewer_stream(struct rcu_head *head)
-{
-       struct relay_viewer_stream *stream =
-               caa_container_of(head, struct relay_viewer_stream, rcu_node);
-
-       if (stream->ctf_trace) {
-               uatomic_dec(&stream->ctf_trace->refcount);
-               assert(uatomic_read(&stream->ctf_trace->refcount) >= 0);
-               if (uatomic_read(&stream->ctf_trace->refcount) == 0) {
-                       DBG("Freeing ctf_trace %" PRIu64, stream->ctf_trace->id);
-                       free(stream->ctf_trace);
-               }
-       }
-
-       free(stream->path_name);
-       free(stream->channel_name);
-       free(stream);
-}
-
+/*
+ * Delete all streams for a specific session ID.
+ */
 static
 void viewer_del_streams(uint64_t session_id)
 {
-       int ret;
        struct relay_viewer_stream *stream;
-       struct lttng_ht_node_u64 *node;
        struct lttng_ht_iter iter;
 
        rcu_read_lock();
-       cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, node, node) {
-               node = lttng_ht_iter_get_node_u64(&iter);
-               if (!node) {
-                       continue;
-               }
+       cds_lfht_for_each_entry(viewer_streams_ht->ht, &iter.iter, stream,
+                       stream_n.node) {
+               health_code_update();
 
-               stream = caa_container_of(node, struct relay_viewer_stream, stream_n);
                if (stream->session_id != session_id) {
                        continue;
                }
 
-               if (stream->read_fd > 0) {
-                       ret = close(stream->read_fd);
-                       if (ret < 0) {
-                               PERROR("close read_fd");
-                       }
-               }
-               if (stream->index_read_fd > 0) {
-                       ret = close(stream->index_read_fd);
-                       if (ret < 0) {
-                               PERROR("close index_read_fd");
-                       }
-               }
-               if (stream->metadata_flag && stream->ctf_trace) {
+               delete_viewer_stream(stream);
+               assert(stream->ctf_trace);
+
+               if (stream->metadata_flag) {
+                       /*
+                        * The metadata viewer stream is destroyed once the refcount on the
+                        * ctf trace goes to 0 in the destroy stream function thus there is
+                        * no explicit call to that function here.
+                        */
                        stream->ctf_trace->metadata_sent = 0;
+                       stream->ctf_trace->viewer_metadata_stream = NULL;
+               } else {
+                       destroy_viewer_stream(stream);
                }
-               ret = lttng_ht_del(viewer_streams_ht, &iter);
-               assert(!ret);
-               call_rcu(&stream->rcu_node, deferred_free_viewer_stream);
        }
        rcu_read_unlock();
 }
@@ -1490,6 +1796,9 @@ void del_connection(struct lttng_ht *relay_connections_ht,
        assert(iter);
        assert(relay_connection);
 
+       DBG("Cleaning connection of session ID %" PRIu64,
+                       relay_connection->session_id);
+
        ret = lttng_ht_del(relay_connections_ht, iter);
        assert(!ret);
 
@@ -1519,6 +1828,8 @@ void *thread_worker(void *data)
 
        rcu_register_thread();
 
+       health_register(health_relayd, HEALTH_RELAYD_TYPE_LIVE_WORKER);
+
        /* table of connections indexed on socket */
        relay_connections_ht = lttng_ht_new(0, LTTNG_HT_TYPE_ULONG);
        if (!relay_connections_ht) {
@@ -1539,9 +1850,13 @@ restart:
        while (1) {
                int i;
 
+               health_code_update();
+
                /* Infinite blocking call, waiting for transmission */
                DBG3("Relayd live viewer worker thread polling...");
+               health_poll_entry();
                ret = lttng_poll_wait(&events, -1);
+               health_poll_exit();
                if (ret < 0) {
                        /*
                         * Restart interrupted system call.
@@ -1564,6 +1879,8 @@ restart:
                        uint32_t revents = LTTNG_POLL_GETEV(&events, i);
                        int pollfd = LTTNG_POLL_GETFD(&events, i);
 
+                       health_code_update();
+
                        /* Thread quit pipe has been closed. Killing thread. */
                        ret = check_thread_quit_pipe(pollfd, revents);
                        if (ret) {
@@ -1614,7 +1931,7 @@ restart:
                                        /* connection closed */
                                        if (ret <= 0) {
                                                cleanup_poll_connection(&events, pollfd);
-                                               del_connection( relay_connections_ht, &iter,
+                                               del_connection(relay_connections_ht, &iter,
                                                                relay_connection);
                                                DBG("Viewer control connection closed with %d",
                                                                pollfd);
@@ -1647,6 +1964,8 @@ error:
        /* empty the hash table and free the memory */
        rcu_read_lock();
        cds_lfht_for_each_entry(relay_connections_ht->ht, &iter.iter, node, node) {
+               health_code_update();
+
                node = lttng_ht_iter_get_node_ulong(&iter);
                if (!node) {
                        continue;
@@ -1666,6 +1985,11 @@ relay_connections_ht_error:
                DBG("Viewer worker thread exited with error");
        }
        DBG("Viewer worker thread cleanup complete");
+       if (err) {
+               health_error();
+               ERR("Health error occurred in %s", __func__);
+       }
+       health_unregister(health_relayd);
        stop_threads();
        rcu_unregister_thread();
        return NULL;
@@ -1684,7 +2008,7 @@ static int create_relay_cmd_pipe(void)
        return ret;
 }
 
-void live_stop_threads()
+void live_stop_threads(void)
 {
        int ret;
        void *status;
@@ -1719,7 +2043,7 @@ error:
  * main
  */
 int live_start_threads(struct lttng_uri *uri,
-               struct relay_local_data *relay_ctx)
+               struct relay_local_data *relay_ctx, int quit_pipe[2])
 {
        int ret = 0;
        void *status;
@@ -1728,10 +2052,8 @@ int live_start_threads(struct lttng_uri *uri,
        assert(uri);
        live_uri = uri;
 
-       /* Create thread quit pipe */
-       if ((ret = init_thread_quit_pipe()) < 0) {
-               goto error;
-       }
+       live_thread_quit_pipe[0] = quit_pipe[0];
+       live_thread_quit_pipe[1] = quit_pipe[1];
 
        /* Check if daemon is UID = 0 */
        is_root = !getuid();
This page took 0.037937 seconds and 4 git commands to generate.