Fix: consumer should await for initial streams
[lttng-tools.git] / src / common / kernel-consumer / kernel-consumer.c
index f5eb35c31e513e458a69c8d607c26764b916d577..5a219fc0b6c543d5c0f5270c3febd4ccfb4adb3a 100644 (file)
@@ -25,6 +25,7 @@
 #include <sys/mman.h>
 #include <sys/socket.h>
 #include <sys/types.h>
+#include <inttypes.h>
 #include <unistd.h>
 #include <sys/stat.h>
 
@@ -91,7 +92,7 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
 
        ret = lttcomm_recv_unix_sock(sock, &msg, sizeof(msg));
        if (ret != sizeof(msg)) {
-               lttng_consumer_send_error(ctx, CONSUMERD_ERROR_RECV_CMD);
+               lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_CMD);
                return ret;
        }
        if (msg.cmd_type == LTTNG_CONSUMER_STOP) {
@@ -104,82 +105,9 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
        switch (msg.cmd_type) {
        case LTTNG_CONSUMER_ADD_RELAYD_SOCKET:
        {
-               int fd;
-               struct consumer_relayd_sock_pair *relayd;
-
-               DBG("Consumer adding relayd socket");
-
-               /* Get relayd reference if exists. */
-               relayd = consumer_find_relayd(msg.u.relayd_sock.net_index);
-               if (relayd == NULL) {
-                       /* Not found. Allocate one. */
-                       relayd = consumer_allocate_relayd_sock_pair(
-                                       msg.u.relayd_sock.net_index);
-                       if (relayd == NULL) {
-                               lttng_consumer_send_error(ctx, CONSUMERD_OUTFD_ERROR);
-                               goto end_nosignal;
-                       }
-               }
-
-               /* Poll on consumer socket. */
-               if (lttng_consumer_poll_socket(consumer_sockpoll) < 0) {
-                       rcu_read_unlock();
-                       return -EINTR;
-               }
-
-               /* Get relayd socket from session daemon */
-               ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
-               if (ret != sizeof(fd)) {
-                       lttng_consumer_send_error(ctx, CONSUMERD_ERROR_RECV_FD);
-                       goto end_nosignal;
-               }
-
-               /* Copy socket information and received FD */
-               switch (msg.u.relayd_sock.type) {
-               case LTTNG_STREAM_CONTROL:
-                       /* Copy received lttcomm socket */
-                       lttcomm_copy_sock(&relayd->control_sock, &msg.u.relayd_sock.sock);
-
-                       ret = lttcomm_create_sock(&relayd->control_sock);
-                       if (ret < 0) {
-                               goto end_nosignal;
-                       }
-
-                       /* Close the created socket fd which is useless */
-                       close(relayd->control_sock.fd);
-
-                       /* Assign new file descriptor */
-                       relayd->control_sock.fd = fd;
-                       break;
-               case LTTNG_STREAM_DATA:
-                       /* Copy received lttcomm socket */
-                       lttcomm_copy_sock(&relayd->data_sock, &msg.u.relayd_sock.sock);
-                       ret = lttcomm_create_sock(&relayd->data_sock);
-                       if (ret < 0) {
-                               goto end_nosignal;
-                       }
-
-                       /* Close the created socket fd which is useless */
-                       close(relayd->data_sock.fd);
-
-                       /* Assign new file descriptor */
-                       relayd->data_sock.fd = fd;
-                       break;
-               default:
-                       ERR("Unknown relayd socket type");
-                       goto end_nosignal;
-               }
-
-               DBG("Consumer %s socket created successfully with net idx %d (fd: %d)",
-                               msg.u.relayd_sock.type == LTTNG_STREAM_CONTROL ? "control" : "data",
-                               relayd->net_seq_idx, fd);
-
-               /*
-                * Add relayd socket pair to consumer data hashtable. If object already
-                * exists or on error, the function gracefully returns.
-                */
-               consumer_add_relayd(relayd);
-
+               ret = consumer_add_relayd_socket(msg.u.relayd_sock.net_index,
+                               msg.u.relayd_sock.type, ctx, sock, consumer_sockpoll,
+                               &msg.u.relayd_sock.sock);
                goto end_nosignal;
        }
        case LTTNG_CONSUMER_ADD_CHANNEL:
@@ -190,9 +118,10 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
                new_channel = consumer_allocate_channel(msg.u.channel.channel_key,
                                -1, -1,
                                msg.u.channel.mmap_len,
-                               msg.u.channel.max_sb_size);
+                               msg.u.channel.max_sb_size,
+                               msg.u.channel.nb_init_streams);
                if (new_channel == NULL) {
-                       lttng_consumer_send_error(ctx, CONSUMERD_OUTFD_ERROR);
+                       lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
                        goto end_nosignal;
                }
                if (ctx->on_recv_channel != NULL) {
@@ -222,7 +151,7 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
                /* Get stream file descriptor from socket */
                ret = lttcomm_recv_fds_unix_sock(sock, &fd, 1);
                if (ret != sizeof(fd)) {
-                       lttng_consumer_send_error(ctx, CONSUMERD_ERROR_RECV_FD);
+                       lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_ERROR_RECV_FD);
                        rcu_read_unlock();
                        return ret;
                }
@@ -239,10 +168,19 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
                                msg.u.stream.net_index,
                                msg.u.stream.metadata_flag);
                if (new_stream == NULL) {
-                       lttng_consumer_send_error(ctx, CONSUMERD_OUTFD_ERROR);
+                       lttng_consumer_send_error(ctx, LTTCOMM_CONSUMERD_OUTFD_ERROR);
                        goto end_nosignal;
                }
 
+               /*
+                * The buffer flush is done on the session daemon side for the kernel
+                * so no need for the stream "hangup_flush_done" variable to be
+                * tracked. This is important for a kernel stream since we don't rely
+                * on the flush state of the stream to read data. It's not the case for
+                * user space tracing.
+                */
+               new_stream->hangup_flush_done = 0;
+
                /* The stream is not metadata. Get relayd reference if exists. */
                relayd = consumer_find_relayd(msg.u.stream.net_index);
                if (relayd != NULL) {
@@ -262,14 +200,29 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
                        goto end_nosignal;
                }
 
-               if (ctx->on_recv_stream != NULL) {
-                       ret = ctx->on_recv_stream(new_stream);
-                       if (ret == 0) {
-                               consumer_add_stream(new_stream);
-                       } else if (ret < 0) {
-                               goto end_nosignal;
+               /* Send stream to the metadata thread */
+               if (new_stream->metadata_flag) {
+                       if (ctx->on_recv_stream) {
+                               ret = ctx->on_recv_stream(new_stream);
+                               if (ret < 0) {
+                                       goto end_nosignal;
+                               }
+                       }
+
+                       do {
+                               ret = write(ctx->consumer_metadata_pipe[1], new_stream,
+                                               sizeof(struct lttng_consumer_stream));
+                       } while (ret < 0 && errno == EINTR);
+                       if (ret < 0) {
+                               PERROR("write metadata pipe");
                        }
                } else {
+                       if (ctx->on_recv_stream) {
+                               ret = ctx->on_recv_stream(new_stream);
+                               if (ret < 0) {
+                                       goto end_nosignal;
+                               }
+                       }
                        consumer_add_stream(new_stream);
                }
 
@@ -283,25 +236,30 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
        }
        case LTTNG_CONSUMER_DESTROY_RELAYD:
        {
+               uint64_t index = msg.u.destroy_relayd.net_seq_idx;
                struct consumer_relayd_sock_pair *relayd;
 
-               DBG("Kernel consumer destroying relayd %zu",
-                               msg.u.destroy_relayd.net_seq_idx);
+               DBG("Kernel consumer destroying relayd %" PRIu64, index);
 
                /* Get relayd reference if exists. */
-               relayd = consumer_find_relayd(msg.u.destroy_relayd.net_seq_idx);
+               relayd = consumer_find_relayd(index);
                if (relayd == NULL) {
-                       ERR("Unable to find relayd %zu", msg.u.destroy_relayd.net_seq_idx);
+                       ERR("Unable to find relayd %" PRIu64, index);
                        goto end_nosignal;
                }
 
-               /* Set destroy flag for this object */
-               uatomic_set(&relayd->destroy_flag, 1);
+               /*
+                * Each relayd socket pair has a refcount of stream attached to it
+                * which tells if the relayd is still active or not depending on the
+                * refcount value.
+                *
+                * This will set the destroy flag of the relayd object and destroy it
+                * if the refcount reaches zero when called.
+                *
+                * The destroy can happen either here or when a stream fd hangs up.
+                */
+               consumer_flag_relayd_for_destroy(relayd);
 
-               /* Destroy the relayd if refcount is 0 else set the destroy flag. */
-               if (uatomic_read(&relayd->refcount) == 0) {
-                       consumer_destroy_relayd(relayd);
-               }
                goto end_nosignal;
        }
        default:
@@ -324,7 +282,12 @@ int lttng_kconsumer_recv_cmd(struct lttng_consumer_local_data *ctx,
        } while (ret < 0 && errno == EINTR);
 end_nosignal:
        rcu_read_unlock();
-       return 0;
+
+       /*
+        * Return 1 to indicate success since the 0 value can be a socket
+        * shutdown during the recv() or send() call.
+        */
+       return 1;
 }
 
 /*
@@ -333,7 +296,7 @@ end_nosignal:
 ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
                struct lttng_consumer_local_data *ctx)
 {
-       unsigned long len;
+       unsigned long len, subbuf_size, padding;
        int err;
        ssize_t ret = 0;
        int infd = stream->wait_fd;
@@ -342,6 +305,7 @@ ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
        /* Get the next subbuffer */
        err = kernctl_get_next_subbuf(infd);
        if (err != 0) {
+               ret = err;
                /*
                 * This is a debug message even for single-threaded consumer,
                 * because poll() have more relaxed criterions than get subbuf,
@@ -353,60 +317,92 @@ ssize_t lttng_kconsumer_read_subbuffer(struct lttng_consumer_stream *stream,
                goto end;
        }
 
+       /* Get the full subbuffer size including padding */
+       err = kernctl_get_padded_subbuf_size(infd, &len);
+       if (err != 0) {
+               errno = -err;
+               perror("Getting sub-buffer len failed.");
+               ret = err;
+               goto end;
+       }
+
        switch (stream->output) {
-               case LTTNG_EVENT_SPLICE:
-                       /* read the whole subbuffer */
-                       err = kernctl_get_padded_subbuf_size(infd, &len);
-                       if (err != 0) {
-                               errno = -ret;
-                               perror("Getting sub-buffer len failed.");
-                               goto end;
-                       }
+       case LTTNG_EVENT_SPLICE:
 
-                       /* splice the subbuffer to the tracefile */
-                       ret = lttng_consumer_on_read_subbuffer_splice(ctx, stream, len);
-                       if (ret != len) {
-                               /*
-                                * display the error but continue processing to try
-                                * to release the subbuffer
-                                */
-                               ERR("Error splicing to tracefile (ret: %ld != len: %ld)",
-                                               ret, len);
-                       }
+               /*
+                * XXX: The lttng-modules splice "actor" does not handle copying
+                * partial pages hence only using the subbuffer size without the
+                * padding makes the splice fail.
+                */
+               subbuf_size = len;
+               padding = 0;
 
-                       break;
-               case LTTNG_EVENT_MMAP:
-                       /* read the used subbuffer size */
-                       err = kernctl_get_padded_subbuf_size(infd, &len);
-                       if (err != 0) {
-                               errno = -ret;
-                               perror("Getting sub-buffer len failed.");
-                               goto end;
-                       }
-                       /* write the subbuffer to the tracefile */
-                       ret = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, len);
-                       if (ret != len) {
-                               /*
-                                * display the error but continue processing to try
-                                * to release the subbuffer
-                                */
-                               ERR("Error writing to tracefile");
-                       }
-                       break;
-               default:
-                       ERR("Unknown output method");
-                       ret = -1;
+               /* splice the subbuffer to the tracefile */
+               ret = lttng_consumer_on_read_subbuffer_splice(ctx, stream, subbuf_size,
+                               padding);
+               /*
+                * XXX: Splice does not support network streaming so the return value
+                * is simply checked against subbuf_size and not like the mmap() op.
+                */
+               if (ret != subbuf_size) {
+                       /*
+                        * display the error but continue processing to try
+                        * to release the subbuffer
+                        */
+                       ERR("Error splicing to tracefile (ret: %zd != len: %lu)",
+                                       ret, subbuf_size);
+               }
+               break;
+       case LTTNG_EVENT_MMAP:
+               /* Get subbuffer size without padding */
+               err = kernctl_get_subbuf_size(infd, &subbuf_size);
+               if (err != 0) {
+                       errno = -err;
+                       perror("Getting sub-buffer len failed.");
+                       ret = err;
+                       goto end;
+               }
+
+               /* Make sure the tracer is not gone mad on us! */
+               assert(len >= subbuf_size);
+
+               padding = len - subbuf_size;
+
+               /* write the subbuffer to the tracefile */
+               ret = lttng_consumer_on_read_subbuffer_mmap(ctx, stream, subbuf_size,
+                               padding);
+               /*
+                * The mmap operation should write subbuf_size amount of data when
+                * network streaming or the full padding (len) size when we are _not_
+                * streaming.
+                */
+               if ((ret != subbuf_size && stream->net_seq_idx != -1) ||
+                               (ret != len && stream->net_seq_idx == -1)) {
+                       /*
+                        * Display the error but continue processing to try to release the
+                        * subbuffer
+                        */
+                       ERR("Error writing to tracefile "
+                                       "(ret: %zd != len: %lu != subbuf_size: %lu)",
+                                       ret, len, subbuf_size);
+               }
+               break;
+       default:
+               ERR("Unknown output method");
+               ret = -1;
        }
 
        err = kernctl_put_next_subbuf(infd);
        if (err != 0) {
-               errno = -ret;
+               errno = -err;
                if (errno == EFAULT) {
                        perror("Error in unreserving sub buffer\n");
                } else if (errno == EIO) {
                        /* Should never happen with newer LTTng versions */
                        perror("Reader has been pushed by the writer, last sub-buffer corrupted.");
                }
+
+               ret = -err;
                goto end;
        }
 
This page took 0.027547 seconds and 4 git commands to generate.