Fix: handle new streams in live mode in relayd
[lttng-tools.git] / src / bin / lttng-relayd / main.c
index f943488be5b94309efd332196bd52e876d2eea1a..730eaf203102886b206aa5d267afc2f8bc9e98b6 100644 (file)
@@ -1,6 +1,7 @@
 /*
  * Copyright (C) 2012 - Julien Desfossez <jdesfossez@efficios.com>
  *                      David Goulet <dgoulet@efficios.com>
+ *               2013 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, version 2 only,
 #include <common/compat/poll.h>
 #include <common/compat/socket.h>
 #include <common/defaults.h>
+#include <common/daemonize.h>
 #include <common/futex.h>
 #include <common/sessiond-comm/sessiond-comm.h>
 #include <common/sessiond-comm/inet.h>
 #include <common/sessiond-comm/relayd.h>
 #include <common/uri.h>
 #include <common/utils.h>
+#include <common/config/config.h>
 
 #include "cmd.h"
 #include "ctf-trace.h"
 #include "lttng-relayd.h"
 #include "live.h"
 #include "health-relayd.h"
+#include "testpoint.h"
 
 /* command line options */
 char *opt_output_path;
-static int opt_daemon;
+static int opt_daemon, opt_background;
+
+/*
+ * We need to wait for listener and live listener threads, as well as
+ * health check thread, before being ready to signal readiness.
+ */
+#define NR_LTTNG_RELAY_READY   3
+static int lttng_relay_ready = NR_LTTNG_RELAY_READY;
+static int recv_child_signal;  /* Set to 1 when a SIGUSR1 signal is received. */
+static pid_t child_ppid;       /* Internal parent PID use with daemonize. */
+
 static struct lttng_uri *control_uri;
 static struct lttng_uri *data_uri;
 static struct lttng_uri *live_uri;
@@ -69,12 +83,15 @@ static struct lttng_uri *live_uri;
 const char *progname;
 
 const char *tracing_group_name = DEFAULT_TRACING_GROUP;
+static int tracing_group_name_override;
+
+const char * const config_section_name = "relayd";
 
 /*
  * Quit pipe for all threads. This permits a single cancellation point
  * for all threads when receiving an event on the pipe.
  */
-static int thread_quit_pipe[2] = { -1, -1 };
+int thread_quit_pipe[2] = { -1, -1 };
 
 /*
  * This pipe is used to inform the worker thread that a command is queued and
@@ -121,6 +138,22 @@ struct lttng_ht *indexes_ht;
 /* Relayd health monitoring */
 struct health_app *health_relayd;
 
+static struct option long_options[] = {
+       { "control-port", 1, 0, 'C', },
+       { "data-port", 1, 0, 'D', },
+       { "live-port", 1, 0, 'L', },
+       { "daemonize", 0, 0, 'd', },
+       { "background", 0, 0, 'b', },
+       { "group", 1, 0, 'g', },
+       { "help", 0, 0, 'h', },
+       { "output", 1, 0, 'o', },
+       { "verbose", 0, 0, 'v', },
+       { "config", 1, 0, 'f' },
+       { NULL, 0, 0, 0, },
+};
+
+static const char *config_ignore_options[] = { "help", "config" };
+
 /*
  * usage function on stderr
  */
@@ -130,90 +163,216 @@ void usage(void)
        fprintf(stderr, "Usage: %s OPTIONS\n\nOptions:\n", progname);
        fprintf(stderr, "  -h, --help                Display this usage.\n");
        fprintf(stderr, "  -d, --daemonize           Start as a daemon.\n");
+       fprintf(stderr, "  -b, --background          Start as a daemon, keeping console open.\n");
        fprintf(stderr, "  -C, --control-port URL    Control port listening.\n");
        fprintf(stderr, "  -D, --data-port URL       Data port listening.\n");
+       fprintf(stderr, "  -L, --live-port URL       Live view port listening.\n");
        fprintf(stderr, "  -o, --output PATH         Output path for traces. Must use an absolute path.\n");
        fprintf(stderr, "  -v, --verbose             Verbose mode. Activate DBG() macro.\n");
        fprintf(stderr, "  -g, --group NAME          Specify the tracing group name. (default: tracing)\n");
+       fprintf(stderr, "  -f  --config              Load daemon configuration file\n");
 }
 
+/*
+ * Take an option from the getopt output and set it in the right variable to be
+ * used later.
+ *
+ * Return 0 on success else a negative value.
+ */
 static
-int parse_args(int argc, char **argv)
+int set_option(int opt, const char *arg, const char *optname)
 {
-       int c;
-       int ret = 0;
-       char *default_address;
-
-       static struct option long_options[] = {
-               { "control-port", 1, 0, 'C', },
-               { "data-port", 1, 0, 'D', },
-               { "daemonize", 0, 0, 'd', },
-               { "group", 1, 0, 'g', },
-               { "help", 0, 0, 'h', },
-               { "output", 1, 0, 'o', },
-               { "verbose", 0, 0, 'v', },
-               { NULL, 0, 0, 0, },
-       };
+       int ret;
 
+       switch (opt) {
+       case 0:
+               fprintf(stderr, "option %s", optname);
+               if (arg) {
+                       fprintf(stderr, " with arg %s\n", arg);
+               }
+               break;
+       case 'C':
+               ret = uri_parse(arg, &control_uri);
+               if (ret < 0) {
+                       ERR("Invalid control URI specified");
+                       goto end;
+               }
+               if (control_uri->port == 0) {
+                       control_uri->port = DEFAULT_NETWORK_CONTROL_PORT;
+               }
+               break;
+       case 'D':
+               ret = uri_parse(arg, &data_uri);
+               if (ret < 0) {
+                       ERR("Invalid data URI specified");
+                       goto end;
+               }
+               if (data_uri->port == 0) {
+                       data_uri->port = DEFAULT_NETWORK_DATA_PORT;
+               }
+               break;
+       case 'L':
+               ret = uri_parse(arg, &live_uri);
+               if (ret < 0) {
+                       ERR("Invalid live URI specified");
+                       goto end;
+               }
+               if (live_uri->port == 0) {
+                       live_uri->port = DEFAULT_NETWORK_VIEWER_PORT;
+               }
+               break;
+       case 'd':
+               opt_daemon = 1;
+               break;
+       case 'b':
+               opt_background = 1;
+               break;
+       case 'g':
+               tracing_group_name = strdup(arg);
+               tracing_group_name_override = 1;
+               break;
+       case 'h':
+               usage();
+               exit(EXIT_FAILURE);
+       case 'o':
+               ret = asprintf(&opt_output_path, "%s", arg);
+               if (ret < 0) {
+                       ret = -errno;
+                       PERROR("asprintf opt_output_path");
+                       goto end;
+               }
+               break;
+       case 'v':
+               /* Verbose level can increase using multiple -v */
+               if (arg) {
+                       lttng_opt_verbose = config_parse_value(arg);
+               } else {
+                       lttng_opt_verbose += 1;
+               }
+               break;
+       default:
+               /* Unknown option or other error.
+                * Error is printed by getopt, just return */
+               ret = -1;
+               goto end;
+       }
+
+       /* All good. */
+       ret = 0;
+
+end:
+       return ret;
+}
+
+/*
+ * config_entry_handler_cb used to handle options read from a config file.
+ * See config_entry_handler_cb comment in common/config/config.h for the
+ * return value conventions.
+ */
+static
+int config_entry_handler(const struct config_entry *entry, void *unused)
+{
+       int ret = 0, i;
+
+       if (!entry || !entry->name || !entry->value) {
+               ret = -EINVAL;
+               goto end;
+       }
+
+       /* Check if the option is to be ignored */
+       for (i = 0; i < sizeof(config_ignore_options) / sizeof(char *); i++) {
+               if (!strcmp(entry->name, config_ignore_options[i])) {
+                       goto end;
+               }
+       }
+
+       for (i = 0; i < (sizeof(long_options) / sizeof(struct option)) - 1; i++) {
+               /* Ignore if entry name is not fully matched. */
+               if (strcmp(entry->name, long_options[i].name)) {
+                       continue;
+               }
+
+               /*
+                * If the option takes no argument on the command line, we have to
+                * check if the value is "true". We support non-zero numeric values,
+                * true, on and yes.
+                */
+               if (!long_options[i].has_arg) {
+                       ret = config_parse_value(entry->value);
+                       if (ret <= 0) {
+                               if (ret) {
+                                       WARN("Invalid configuration value \"%s\" for option %s",
+                                                       entry->value, entry->name);
+                               }
+                               /* False, skip boolean config option. */
+                               goto end;
+                       }
+               }
+
+               ret = set_option(long_options[i].val, entry->value, entry->name);
+               goto end;
+       }
+
+       WARN("Unrecognized option \"%s\" in daemon configuration file.",
+                       entry->name);
+
+end:
+       return ret;
+}
+
+static
+int set_options(int argc, char **argv)
+{
+       int c, ret = 0, option_index = 0;
+       int orig_optopt = optopt, orig_optind = optind;
+       char *default_address, *optstring;
+       const char *config_path = NULL;
+
+       optstring = utils_generate_optstring(long_options,
+                       sizeof(long_options) / sizeof(struct option));
+       if (!optstring) {
+               ret = -ENOMEM;
+               goto exit;
+       }
+
+       /* Check for the --config option */
+
+       while ((c = getopt_long(argc, argv, optstring, long_options,
+                                       &option_index)) != -1) {
+               if (c == '?') {
+                       ret = -EINVAL;
+                       goto exit;
+               } else if (c != 'f') {
+                       continue;
+               }
+
+               config_path = utils_expand_path(optarg);
+               if (!config_path) {
+                       ERR("Failed to resolve path: %s", optarg);
+               }
+       }
+
+       ret = config_get_section_entries(config_path, config_section_name,
+                       config_entry_handler, NULL);
+       if (ret) {
+               if (ret > 0) {
+                       ERR("Invalid configuration option at line %i", ret);
+                       ret = -1;
+               }
+               goto exit;
+       }
+
+       /* Reset getopt's global state */
+       optopt = orig_optopt;
+       optind = orig_optind;
        while (1) {
-               int option_index = 0;
-               c = getopt_long(argc, argv, "dhv" "C:D:o:g:",
-                               long_options, &option_index);
+               c = getopt_long(argc, argv, optstring, long_options, &option_index);
                if (c == -1) {
                        break;
                }
 
-               switch (c) {
-               case 0:
-                       fprintf(stderr, "option %s", long_options[option_index].name);
-                       if (optarg) {
-                               fprintf(stderr, " with arg %s\n", optarg);
-                       }
-                       break;
-               case 'C':
-                       ret = uri_parse(optarg, &control_uri);
-                       if (ret < 0) {
-                               ERR("Invalid control URI specified");
-                               goto exit;
-                       }
-                       if (control_uri->port == 0) {
-                               control_uri->port = DEFAULT_NETWORK_CONTROL_PORT;
-                       }
-                       break;
-               case 'D':
-                       ret = uri_parse(optarg, &data_uri);
-                       if (ret < 0) {
-                               ERR("Invalid data URI specified");
-                               goto exit;
-                       }
-                       if (data_uri->port == 0) {
-                               data_uri->port = DEFAULT_NETWORK_DATA_PORT;
-                       }
-                       break;
-               case 'd':
-                       opt_daemon = 1;
-                       break;
-               case 'g':
-                       tracing_group_name = optarg;
-                       break;
-               case 'h':
-                       usage();
-                       exit(EXIT_FAILURE);
-               case 'o':
-                       ret = asprintf(&opt_output_path, "%s", optarg);
-                       if (ret < 0) {
-                               PERROR("asprintf opt_output_path");
-                               goto exit;
-                       }
-                       break;
-               case 'v':
-                       /* Verbose level can increase using multiple -v */
-                       lttng_opt_verbose += 1;
-                       break;
-               default:
-                       /* Unknown option or other error.
-                        * Error is printed by getopt, just return */
-                       ret = -1;
+               ret = set_option(c, optarg, long_options[option_index].name);
+               if (ret < 0) {
                        goto exit;
                }
        }
@@ -266,6 +425,7 @@ int parse_args(int argc, char **argv)
        }
 
 exit:
+       free(optstring);
        return ret;
 }
 
@@ -285,6 +445,11 @@ void cleanup(void)
 
        uri_free(control_uri);
        uri_free(data_uri);
+       /* Live URI is freed in the live thread. */
+
+       if (tracing_group_name_override) {
+               free((void *) tracing_group_name);
+       }
 }
 
 /*
@@ -293,12 +458,10 @@ void cleanup(void)
 static
 int notify_thread_pipe(int wpipe)
 {
-       int ret;
+       ssize_t ret;
 
-       do {
-               ret = write(wpipe, "!", 1);
-       } while (ret < 0 && errno == EINTR);
-       if (ret < 0 || ret != 1) {
+       ret = lttng_write(wpipe, "!", 1);
+       if (ret < 1) {
                PERROR("write poll pipe");
        }
 
@@ -307,12 +470,10 @@ int notify_thread_pipe(int wpipe)
 
 static void notify_health_quit_pipe(int *pipe)
 {
-       int ret;
+       ssize_t ret;
 
-       do {
-               ret = write(pipe[1], "4", 1);
-       } while (ret < 0 && errno == EINTR);
-       if (ret < 0 || ret != 1) {
+       ret = lttng_write(pipe[1], "4", 1);
+       if (ret < 1) {
                PERROR("write relay health quit");
        }
 }
@@ -360,6 +521,9 @@ void sighandler(int sig)
                DBG("SIGTERM caught");
                stop_threads();
                break;
+       case SIGUSR1:
+               CMM_STORE_SHARED(recv_child_signal, 1);
+               break;
        default:
                break;
        }
@@ -399,11 +563,26 @@ int set_signal_handler(void)
                return ret;
        }
 
-       DBG("Signal handler set for SIGTERM, SIGPIPE and SIGINT");
+       if ((ret = sigaction(SIGUSR1, &sa, NULL)) < 0) {
+               PERROR("sigaction");
+               return ret;
+       }
+
+       DBG("Signal handler set for SIGTERM, SIGUSR1, SIGPIPE and SIGINT");
 
        return ret;
 }
 
+void lttng_relay_notify_ready(void)
+{
+       /* Notify the parent of the fork() process that we are ready. */
+       if (opt_daemon || opt_background) {
+               if (uatomic_sub_return(&lttng_relay_ready, 1) == 0) {
+                       kill(child_ppid, SIGUSR1);
+               }
+       }
+}
+
 /*
  * Init thread quit pipe.
  *
@@ -438,7 +617,7 @@ int create_thread_poll_set(struct lttng_poll_event *events, int size)
        }
 
        /* Add quit pipe */
-       ret = lttng_poll_add(events, thread_quit_pipe[0], LPOLLIN);
+       ret = lttng_poll_add(events, thread_quit_pipe[0], LPOLLIN | LPOLLERR);
        if (ret < 0) {
                goto error;
        }
@@ -573,6 +752,12 @@ void *relay_thread_listener(void *data)
                goto error_poll_add;
        }
 
+       lttng_relay_notify_ready();
+
+       if (testpoint(relayd_thread_listener)) {
+               goto error_testpoint;
+       }
+
        while (1) {
                health_code_update();
 
@@ -673,6 +858,7 @@ restart:
 exit:
 error:
 error_poll_add:
+error_testpoint:
        lttng_poll_clean(&events);
 error_create_poll:
        if (data_sock->fd >= 0) {
@@ -707,7 +893,8 @@ error_sock_control:
 static
 void *relay_thread_dispatcher(void *data)
 {
-       int ret, err = -1;
+       int err = -1;
+       ssize_t ret;
        struct cds_wfq_node *node;
        struct relay_command *relay_cmd = NULL;
 
@@ -715,6 +902,10 @@ void *relay_thread_dispatcher(void *data)
 
        health_register(health_relayd, HEALTH_RELAYD_TYPE_DISPATCHER);
 
+       if (testpoint(relayd_thread_dispatcher)) {
+               goto error_testpoint;
+       }
+
        health_code_update();
 
        while (!CMM_LOAD_SHARED(dispatch_thread_exit)) {
@@ -742,12 +933,10 @@ void *relay_thread_dispatcher(void *data)
                         * call is blocking so we can be assured that the data will be read
                         * at some point in time or wait to the end of the world :)
                         */
-                       do {
-                               ret = write(relay_cmd_pipe[1], relay_cmd,
-                                               sizeof(struct relay_command));
-                       } while (ret < 0 && errno == EINTR);
+                       ret = lttng_write(relay_cmd_pipe[1], relay_cmd,
+                                       sizeof(struct relay_command));
                        free(relay_cmd);
-                       if (ret < 0 || ret != sizeof(struct relay_command)) {
+                       if (ret < sizeof(struct relay_command)) {
                                PERROR("write cmd pipe");
                                goto error;
                        }
@@ -763,6 +952,7 @@ void *relay_thread_dispatcher(void *data)
        err = 0;
 
 error:
+error_testpoint:
        if (err) {
                health_error();
                ERR("Health error occurred in %s", __func__);
@@ -805,8 +995,6 @@ void deferred_free_stream(struct rcu_head *head)
        struct relay_stream *stream =
                caa_container_of(head, struct relay_stream, rcu_node);
 
-       ctf_trace_try_destroy(stream->ctf_trace);
-
        free(stream->path_name);
        free(stream->channel_name);
        free(stream);
@@ -854,7 +1042,11 @@ static void destroy_stream(struct relay_stream *stream)
                 * lookup failure on the live thread side of a stream indicates
                 * that the viewer stream index received value should be used.
                 */
+               pthread_mutex_lock(&stream->viewer_stream_rotation_lock);
                vstream->total_index_received = stream->total_index_received;
+               vstream->tracefile_count_last = stream->tracefile_count_current;
+               vstream->close_write_flag = 1;
+               pthread_mutex_unlock(&stream->viewer_stream_rotation_lock);
        }
 
        /* Cleanup index of that stream. */
@@ -866,6 +1058,11 @@ static void destroy_stream(struct relay_stream *stream)
        iter.iter.node = &stream->ctf_trace_node.node;
        delret = lttng_ht_del(stream->ctf_traces_ht, &iter);
        assert(!delret);
+
+       if (stream->ctf_trace) {
+               ctf_trace_try_destroy(stream->ctf_trace);
+       }
+
        call_rcu(&stream->rcu_node, deferred_free_stream);
        DBG("Closed tracefile %d from close stream", stream->fd);
 }
@@ -898,6 +1095,8 @@ void relay_delete_session(struct relay_command *cmd,
                stream = caa_container_of(node, struct relay_stream, stream_n);
                if (stream->session == cmd->session) {
                        destroy_stream(stream);
+                       cmd->session->stream_count--;
+                       assert(cmd->session->stream_count >= 0);
                }
        }
 
@@ -961,11 +1160,16 @@ int relay_create_session(struct lttcomm_relayd_hdr *recv_hdr,
        session->sock = cmd->sock;
        session->minor = cmd->minor;
        session->major = cmd->major;
+       pthread_mutex_init(&session->viewer_ready_lock, NULL);
        cmd->session = session;
 
        reply.session_id = htobe64(session->id);
 
        switch (cmd->minor) {
+               case 1:
+               case 2:
+               case 3:
+                       break;
                case 4: /* LTTng sessiond 2.4 */
                default:
                        ret = cmd_create_session_2_4(cmd, session);
@@ -995,6 +1199,64 @@ error:
        return ret;
 }
 
+/*
+ * When we have received all the streams and the metadata for a channel,
+ * we make them visible to the viewer threads.
+ */
+static
+void set_viewer_ready_flag(struct relay_command *cmd)
+{
+       struct relay_stream_recv_handle *node, *tmp_node;
+
+       pthread_mutex_lock(&cmd->session->viewer_ready_lock);
+
+       cds_list_for_each_entry_safe(node, tmp_node, &cmd->recv_head, node) {
+               struct relay_stream *stream;
+
+               rcu_read_lock();
+               stream = relay_stream_find_by_id(node->id);
+               if (!stream) {
+                       /*
+                        * Stream is most probably being cleaned up by the data thread thus
+                        * simply continue to the next one.
+                        */
+                       rcu_read_unlock();
+                       continue;
+               }
+
+               stream->viewer_ready = 1;
+               rcu_read_unlock();
+
+               /* Clean stream handle node. */
+               cds_list_del(&node->node);
+               free(node);
+       }
+
+       pthread_mutex_unlock(&cmd->session->viewer_ready_lock);
+       return;
+}
+
+/*
+ * Add a recv handle node to the connection recv list with the given stream
+ * handle. A new node is allocated thus must be freed when the node is deleted
+ * from the list.
+ */
+static void queue_stream_handle(uint64_t handle, struct relay_command *cmd)
+{
+       struct relay_stream_recv_handle *node;
+
+       assert(cmd);
+
+       node = zmalloc(sizeof(*node));
+       if (!node) {
+               PERROR("zmalloc queue stream handle");
+               return;
+       }
+
+       node->id = handle;
+       cds_list_add(&node->node, &cmd->recv_head);
+}
+
 /*
  * relay_add_stream: allocate a new stream for a session
  */
@@ -1087,6 +1349,13 @@ int relay_add_stream(struct lttcomm_relayd_hdr *recv_hdr,
        ctf_trace_assign(cmd->ctf_traces_ht, stream);
        stream->ctf_traces_ht = cmd->ctf_traces_ht;
 
+       /*
+        * Add the stream handle in the recv list of the connection. Once the end
+        * stream message is received, this list is emptied and streams are set
+        * with the viewer ready flag.
+        */
+       queue_stream_handle(stream->stream_handle, cmd);
+
        lttng_ht_node_init_ulong(&stream->stream_n,
                        (unsigned long) stream->stream_handle);
        lttng_ht_add_unique_ulong(relay_streams_ht,
@@ -1094,6 +1363,7 @@ int relay_add_stream(struct lttcomm_relayd_hdr *recv_hdr,
 
        lttng_ht_node_init_str(&stream->ctf_trace_node, stream->path_name);
        lttng_ht_add_str(cmd->ctf_traces_ht, &stream->ctf_trace_node);
+       session->stream_count++;
 
        DBG("Relay new stream added %s with ID %" PRIu64, stream->channel_name,
                        stream->stream_handle);
@@ -1170,6 +1440,8 @@ int relay_close_stream(struct lttcomm_relayd_hdr *recv_hdr,
 
        stream->last_net_seq_num = be64toh(stream_info.last_net_seq_num);
        stream->close_flag = 1;
+       session->stream_count--;
+       assert(session->stream_count >= 0);
 
        if (close_stream_check(stream)) {
                destroy_stream(stream);
@@ -1243,7 +1515,7 @@ int relay_start(struct lttcomm_relayd_hdr *recv_hdr,
  */
 static int write_padding_to_file(int fd, uint32_t size)
 {
-       int ret = 0;
+       ssize_t ret = 0;
        char *zeros;
 
        if (size == 0) {
@@ -1257,10 +1529,8 @@ static int write_padding_to_file(int fd, uint32_t size)
                goto end;
        }
 
-       do {
-               ret = write(fd, zeros, size);
-       } while (ret < 0 && errno == EINTR);
-       if (ret < 0 || ret != size) {
+       ret = lttng_write(fd, zeros, size);
+       if (ret < size) {
                PERROR("write padding to file");
        }
 
@@ -1278,6 +1548,7 @@ int relay_recv_metadata(struct lttcomm_relayd_hdr *recv_hdr,
                struct relay_command *cmd)
 {
        int ret = htobe32(LTTNG_OK);
+       ssize_t size_ret;
        struct relay_session *session = cmd->session;
        struct lttcomm_relayd_metadata_payload *metadata_struct;
        struct relay_stream *metadata_stream;
@@ -1334,11 +1605,9 @@ int relay_recv_metadata(struct lttcomm_relayd_hdr *recv_hdr,
                goto end_unlock;
        }
 
-       do {
-               ret = write(metadata_stream->fd, metadata_struct->payload,
-                               payload_size);
-       } while (ret < 0 && errno == EINTR);
-       if (ret < 0 || ret != payload_size) {
+       size_ret = lttng_write(metadata_stream->fd, metadata_struct->payload,
+                       payload_size);
+       if (size_ret < payload_size) {
                ERR("Relay error writing metadata on file");
                ret = -1;
                goto end_unlock;
@@ -1835,6 +2104,53 @@ end_no_session:
        return ret;
 }
 
+/*
+ * Receive the streams_sent message.
+ *
+ * Return 0 on success else a negative value.
+ */
+static
+int relay_streams_sent(struct lttcomm_relayd_hdr *recv_hdr,
+               struct relay_command *cmd)
+{
+       int ret, send_ret;
+       struct lttcomm_relayd_generic_reply reply;
+
+       assert(cmd);
+
+       DBG("Relay receiving streams_sent");
+
+       if (!cmd->session || cmd->version_check_done == 0) {
+               ERR("Trying to close a stream before version check");
+               ret = -1;
+               goto end_no_session;
+       }
+
+       /*
+        * Flag every pending stream in the connection recv list that they are
+        * ready to be used by the viewer.
+        */
+       set_viewer_ready_flag(cmd);
+
+       /*
+        * Inform the viewer that there are new streams in the session.
+        */
+       uatomic_set(&cmd->session->new_streams, 1);
+
+       reply.ret_code = htobe32(LTTNG_OK);
+       send_ret = cmd->sock->ops->sendmsg(cmd->sock, &reply, sizeof(reply), 0);
+       if (send_ret < 0) {
+               ERR("Relay sending sent_stream reply");
+               ret = send_ret;
+       } else {
+               /* Success. */
+               ret = 0;
+       }
+
+end_no_session:
+       return ret;
+}
+
 /*
  * Process the commands received on the control socket
  */
@@ -1878,6 +2194,9 @@ int relay_process_control(struct lttcomm_relayd_hdr *recv_hdr,
        case RELAYD_SEND_INDEX:
                ret = relay_recv_index(recv_hdr, cmd);
                break;
+       case RELAYD_STREAMS_SENT:
+               ret = relay_streams_sent(recv_hdr, cmd);
+               break;
        case RELAYD_UPDATE_SYNC_INFO:
        default:
                ERR("Received unknown command (%u)", be32toh(recv_hdr->cmd));
@@ -1979,6 +2298,7 @@ static
 int relay_process_data(struct relay_command *cmd)
 {
        int ret = 0, rotate_index = 0;
+       ssize_t size_ret;
        struct relay_stream *stream;
        struct lttcomm_relayd_data_hdr data_hdr;
        uint64_t stream_id;
@@ -2041,10 +2361,54 @@ int relay_process_data(struct relay_command *cmd)
        if (stream->tracefile_size > 0 &&
                        (stream->tracefile_size_current + data_size) >
                        stream->tracefile_size) {
+               struct relay_viewer_stream *vstream;
+               uint64_t new_id;
+
+               new_id = (stream->tracefile_count_current + 1) %
+                       stream->tracefile_count;
+               /*
+                * When we wrap-around back to 0, we start overwriting old
+                * trace data.
+                */
+               if (!stream->tracefile_overwrite && new_id == 0) {
+                       stream->tracefile_overwrite = 1;
+               }
+               pthread_mutex_lock(&stream->viewer_stream_rotation_lock);
+               if (stream->tracefile_overwrite) {
+                       stream->oldest_tracefile_id =
+                               (stream->oldest_tracefile_id + 1) %
+                               stream->tracefile_count;
+               }
+               vstream = live_find_viewer_stream_by_id(stream->stream_handle);
+               if (vstream) {
+                       /*
+                        * The viewer is reading a file about to be
+                        * overwritten. Close the FDs it is
+                        * currently using and let it handle the fault.
+                        */
+                       if (vstream->tracefile_count_current == new_id) {
+                               pthread_mutex_lock(&vstream->overwrite_lock);
+                               vstream->abort_flag = 1;
+                               pthread_mutex_unlock(&vstream->overwrite_lock);
+                               DBG("Streaming side setting abort_flag on stream %s_%lu\n",
+                                               stream->channel_name, new_id);
+                       } else if (vstream->tracefile_count_current ==
+                                       stream->tracefile_count_current) {
+                               /*
+                                * The reader and writer were in the
+                                * same trace file, inform the viewer
+                                * that no new index will ever be added
+                                * to this file.
+                                */
+                               vstream->close_write_flag = 1;
+                       }
+               }
                ret = utils_rotate_stream_file(stream->path_name, stream->channel_name,
                                stream->tracefile_size, stream->tracefile_count,
                                relayd_uid, relayd_gid, stream->fd,
                                &(stream->tracefile_count_current), &stream->fd);
+               stream->total_index_received = 0;
+               pthread_mutex_unlock(&stream->viewer_stream_rotation_lock);
                if (ret < 0) {
                        ERR("Rotating stream output file");
                        goto end_rcu_unlock;
@@ -2066,10 +2430,8 @@ int relay_process_data(struct relay_command *cmd)
        }
 
        /* Write data to stream output fd. */
-       do {
-               ret = write(stream->fd, data_buffer, data_size);
-       } while (ret < 0 && errno == EINTR);
-       if (ret < 0 || ret != data_size) {
+       size_ret = lttng_write(stream->fd, data_buffer, data_size);
+       if (size_ret < data_size) {
                ERR("Relay error writing data to file");
                ret = -1;
                goto end_rcu_unlock;
@@ -2115,20 +2477,19 @@ int relay_add_connection(int fd, struct lttng_poll_event *events,
                struct lttng_ht *relay_connections_ht)
 {
        struct relay_command *relay_connection;
-       int ret;
+       ssize_t ret;
 
        relay_connection = zmalloc(sizeof(struct relay_command));
        if (relay_connection == NULL) {
                PERROR("Relay command zmalloc");
                goto error;
        }
-       do {
-               ret = read(fd, relay_connection, sizeof(struct relay_command));
-       } while (ret < 0 && errno == EINTR);
-       if (ret < 0 || ret < sizeof(struct relay_command)) {
+       ret = lttng_read(fd, relay_connection, sizeof(struct relay_command));
+       if (ret < sizeof(struct relay_command)) {
                PERROR("read relay cmd pipe");
                goto error_read;
        }
+       CDS_INIT_LIST_HEAD(&relay_connection->recv_head);
 
        /*
         * Only used by the control side and the reference is copied inside each
@@ -2180,8 +2541,17 @@ void relay_del_connection(struct lttng_ht *relay_connections_ht,
        assert(!ret);
 
        if (relay_connection->type == RELAY_CONTROL) {
+               struct relay_stream_recv_handle *node, *tmp_node;
+
                relay_delete_session(relay_connection, sessions_ht);
                lttng_ht_destroy(relay_connection->ctf_traces_ht);
+
+               /* Clean up recv list. */
+               cds_list_for_each_entry_safe(node, tmp_node,
+                               &relay_connection->recv_head, node) {
+                       cds_list_del(&node->node);
+                       free(node);
+               }
        }
 
        call_rcu(&relay_connection->rcu_node, deferred_free_connection);
@@ -2210,6 +2580,10 @@ void *relay_thread_worker(void *data)
 
        health_register(health_relayd, HEALTH_RELAYD_TYPE_WORKER);
 
+       if (testpoint(relayd_thread_worker)) {
+               goto error_testpoint;
+       }
+
        health_code_update();
 
        /* table of connections indexed on socket */
@@ -2471,6 +2845,7 @@ relay_connections_ht_error:
        }
        DBG("Worker thread cleanup complete");
        free(data_buffer);
+error_testpoint:
        if (err) {
                health_error();
                ERR("Health error occurred in %s", __func__);
@@ -2503,14 +2878,9 @@ int main(int argc, char **argv)
        void *status;
        struct relay_local_data *relay_ctx;
 
-       /* Create thread quit pipe */
-       if ((ret = init_thread_quit_pipe()) < 0) {
-               goto error;
-       }
-
        /* Parse arguments */
        progname = argv[0];
-       if ((ret = parse_args(argc, argv)) < 0) {
+       if ((ret = set_options(argc, argv)) < 0) {
                goto exit;
        }
 
@@ -2533,12 +2903,28 @@ int main(int argc, char **argv)
        }
 
        /* Daemonize */
-       if (opt_daemon) {
-               ret = daemon(0, 0);
+       if (opt_daemon || opt_background) {
+               int i;
+
+               ret = lttng_daemonize(&child_ppid, &recv_child_signal,
+                       !opt_background);
                if (ret < 0) {
-                       PERROR("daemon");
                        goto exit;
                }
+
+               /*
+                * We are in the child. Make sure all other file
+                * descriptors are closed, in case we are called with
+                * more opened file descriptors than the standard ones.
+                */
+               for (i = 3; i < sysconf(_SC_OPEN_MAX); i++) {
+                       (void) close(i);
+               }
+       }
+
+       /* Create thread quit pipe */
+       if ((ret = init_thread_quit_pipe()) < 0) {
+               goto error;
        }
 
        /* We need those values for the file/dir creation. */
@@ -2547,7 +2933,7 @@ int main(int argc, char **argv)
 
        /* Check if daemon is UID = 0 */
        if (relayd_uid == 0) {
-               if (control_uri->port < 1024 || data_uri->port < 1024) {
+               if (control_uri->port < 1024 || data_uri->port < 1024 || live_uri->port < 1024) {
                        ERR("Need to be root to use ports < 1024");
                        ret = -1;
                        goto exit;
@@ -2567,6 +2953,7 @@ int main(int argc, char **argv)
 
        /* Initialize communication library */
        lttcomm_init();
+       lttcomm_inet_init();
 
        relay_ctx = zmalloc(sizeof(struct relay_local_data));
        if (!relay_ctx) {
@@ -2636,7 +3023,7 @@ int main(int argc, char **argv)
                goto exit_listener;
        }
 
-       ret = live_start_threads(live_uri, relay_ctx, thread_quit_pipe);
+       ret = live_start_threads(live_uri, relay_ctx);
        if (ret != 0) {
                ERR("Starting live viewer threads");
                goto exit_live;
This page took 0.034616 seconds and 4 git commands to generate.