Consume with mmap if enabled
[lttng-tools.git] / kconsumerd / kconsumerd.c
index f0ccb8c9e797a049141a1ba0fc131f70fe6d3197..c816d0dece14e623ebdf3d68b60d68b3ae443466 100644 (file)
@@ -35,6 +35,7 @@
 #include <urcu/list.h>
 #include <poll.h>
 #include <unistd.h>
+#include <sys/mman.h>
 
 #include "lttngerr.h"
 #include "libkernelctl.h"
@@ -67,6 +68,12 @@ static int poll_pipe[2];
 /* socket to communicate errors with sessiond */
 static int error_socket = -1;
 
+/* to count the number of time the user pressed ctrl+c */
+static int sigintcount = 0;
+
+/* flag to inform the polling thread to quit when all fd hung up */
+static int quit = 0;
+
 /* Argument variables */
 int opt_quiet;
 int opt_verbose;
@@ -156,7 +163,7 @@ static int add_fd(struct lttcomm_kconsumerd_msg *buf, int consumerd_fd)
        /* Opening the tracefile in write mode */
        DBG("Opening %s for writing", tmp_fd->path_name);
        ret = open(tmp_fd->path_name,
-                       O_WRONLY|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO);
+                       O_WRONLY|O_CREAT|O_TRUNC, S_IRWXU|S_IRWXG|S_IRWXO);
        if (ret < 0) {
                ERR("Opening %s", tmp_fd->path_name);
                perror("open");
@@ -185,6 +192,11 @@ end:
  */
 static void sighandler(int sig)
 {
+       if (sig == SIGINT && sigintcount++ == 0) {
+               DBG("ignoring first SIGINT");
+               return;
+       }
+
        cleanup();
 
        return;
@@ -228,6 +240,119 @@ static int set_signal_handler(void)
        return ret;
 }
 
+/*
+ * on_read_subbuffer_mmap
+ *
+ * mmap the ring buffer, read it and write the data to the tracefile.
+ * Returns the number of bytes written
+ */
+static int on_read_subbuffer_mmap(struct ltt_kconsumerd_fd *kconsumerd_fd,
+               unsigned long len)
+{
+       unsigned long mmap_len;
+       unsigned long mmap_offset;
+       unsigned long padded_len;
+       unsigned long padding_len;
+       char *mmap_base;
+       char *padding = NULL;
+       long ret = 0;
+       off_t orig_offset = kconsumerd_fd->out_fd_offset;
+       int fd = kconsumerd_fd->consumerd_fd;
+       int outfd = kconsumerd_fd->out_fd;
+
+       /* get the padded subbuffer size to know the padding required */
+       ret = kernctl_get_padded_subbuf_size(fd, &padded_len);
+       if (ret != 0) {
+               ret = errno;
+               perror("kernctl_get_padded_subbuf_size");
+               goto end;
+       }
+       padding_len = padded_len - len;
+       padding = malloc(padding_len * sizeof(char));
+       memset(padding, '\0', padding_len);
+
+       /* get the len of the mmap region */
+       ret = kernctl_get_mmap_len(fd, &mmap_len);
+       if (ret != 0) {
+               ret = errno;
+               perror("kernctl_get_mmap_len");
+               goto end;
+       }
+
+       /* get the offset inside the fd to mmap */
+       ret = kernctl_get_mmap_read_offset(fd, &mmap_offset);
+       if (ret != 0) {
+               ret = errno;
+               perror("kernctl_get_mmap_read_offset");
+               goto end;
+       }
+
+       mmap_base = mmap(NULL, mmap_len, PROT_READ, MAP_PRIVATE, fd, mmap_offset);
+       if (mmap_base == MAP_FAILED) {
+               perror("Error mmaping");
+               ret = -1;
+               goto end;
+       }
+
+       while (len > 0) {
+               ret = write(outfd, mmap_base, len);
+               if (ret >= len) {
+                       len = 0;
+               } else if (ret < 0) {
+                       ret = errno;
+                       perror("Error in file write");
+                       goto end;
+               }
+               /* This won't block, but will start writeout asynchronously */
+               sync_file_range(outfd, kconsumerd_fd->out_fd_offset, ret,
+                               SYNC_FILE_RANGE_WRITE);
+               kconsumerd_fd->out_fd_offset += ret;
+       }
+
+       /* once all the data is written, write the padding to disk */
+       ret = write(outfd, padding, padding_len);
+       if (ret < 0) {
+               ret = errno;
+               perror("Error writing padding to file");
+               goto end;
+       }
+
+       /*
+        * This does a blocking write-and-wait on any page that belongs to the
+        * subbuffer prior to the one we just wrote.
+        * Don't care about error values, as these are just hints and ways to
+        * limit the amount of page cache used.
+        */
+       if (orig_offset >= kconsumerd_fd->max_sb_size) {
+               sync_file_range(outfd, orig_offset - kconsumerd_fd->max_sb_size,
+                               kconsumerd_fd->max_sb_size,
+                               SYNC_FILE_RANGE_WAIT_BEFORE
+                               | SYNC_FILE_RANGE_WRITE
+                               | SYNC_FILE_RANGE_WAIT_AFTER);
+               /*
+                * Give hints to the kernel about how we access the file:
+                * POSIX_FADV_DONTNEED : we won't re-access data in a near
+                * future after we write it.
+                * We need to call fadvise again after the file grows because
+                * the kernel does not seem to apply fadvise to non-existing
+                * parts of the file.
+                * Call fadvise _after_ having waited for the page writeback to
+                * complete because the dirty page writeback semantic is not
+                * well defined. So it can be expected to lead to lower
+                * throughput in streaming.
+                */
+               posix_fadvise(outfd, orig_offset - kconsumerd_fd->max_sb_size,
+                               kconsumerd_fd->max_sb_size, POSIX_FADV_DONTNEED);
+       }
+       goto end;
+
+end:
+       if (padding != NULL) {
+               free(padding);
+       }
+       return ret;
+}
+
 /*
  * on_read_subbuffer
  *
@@ -304,18 +429,18 @@ static int on_read_subbuffer(struct ltt_kconsumerd_fd *kconsumerd_fd,
 splice_error:
        /* send the appropriate error description to sessiond */
        switch(ret) {
-               case EBADF:
-                       send_error(KCONSUMERD_SPLICE_EBADF);
-                       break;
-               case EINVAL:
-                       send_error(KCONSUMERD_SPLICE_EINVAL);
-                       break;
-               case ENOMEM:
-                       send_error(KCONSUMERD_SPLICE_ENOMEM);
-                       break;
-               case ESPIPE:
-                       send_error(KCONSUMERD_SPLICE_ESPIPE);
-                       break;
+       case EBADF:
+               send_error(KCONSUMERD_SPLICE_EBADF);
+               break;
+       case EINVAL:
+               send_error(KCONSUMERD_SPLICE_EINVAL);
+               break;
+       case ENOMEM:
+               send_error(KCONSUMERD_SPLICE_ENOMEM);
+               break;
+       case ESPIPE:
+               send_error(KCONSUMERD_SPLICE_ESPIPE);
+               break;
        }
 
 end:
@@ -344,22 +469,46 @@ static int read_subbuffer(struct ltt_kconsumerd_fd *kconsumerd_fd)
                goto end;
        }
 
-       /* read the whole subbuffer */
-       err = kernctl_get_padded_subbuf_size(infd, &len);
-       if (err != 0) {
-               ret = errno;
-               perror("Getting sub-buffer len failed.");
-               goto end;
-       }
+       if (DEFAULT_CHANNEL_OUTPUT == LTTNG_KERNEL_SPLICE) {
+               /* read the whole subbuffer */
+               err = kernctl_get_padded_subbuf_size(infd, &len);
+               if (err != 0) {
+                       ret = errno;
+                       perror("Getting sub-buffer len failed.");
+                       goto end;
+               }
 
-       /* splice the subbuffer to the tracefile */
-       ret = on_read_subbuffer(kconsumerd_fd, len);
-       if (ret < 0) {
-               /*
-                * display the error but continue processing to try
-                * to release the subbuffer
-                */
-               ERR("Error splicing to tracefile");
+               /* splice the subbuffer to the tracefile */
+               ret = on_read_subbuffer(kconsumerd_fd, len);
+               if (ret < 0) {
+                       /*
+                        * display the error but continue processing to try
+                        * to release the subbuffer
+                        */
+                       ERR("Error splicing to tracefile");
+               }
+       } else if (DEFAULT_CHANNEL_OUTPUT == LTTNG_KERNEL_MMAP) {
+               /* read the used subbuffer size */
+               err = kernctl_get_subbuf_size(infd, &len);
+               if (err != 0) {
+                       ret = errno;
+                       perror("Getting sub-buffer len failed.");
+                       goto end;
+               }
+
+               /* write the subbuffer to the tracefile */
+               ret = on_read_subbuffer_mmap(kconsumerd_fd, len);
+               if (ret < 0) {
+                       /*
+                        * display the error but continue processing to try
+                        * to release the subbuffer
+                        */
+                       ERR("Error writing to tracefile");
+               }
+       } else {
+               ERR("Unknown output method");
+               ret = -1;
+               goto end;
        }
 
        err = kernctl_put_next_subbuf(infd);
@@ -384,7 +533,7 @@ end:
  * Update a fd according to what we just received
  */
 static void change_fd_state(int sessiond_fd,
-               enum lttcomm_kconsumerd_fd_state state)
+               enum kconsumerd_fd_state state)
 {
        struct ltt_kconsumerd_fd *iter;
        cds_list_for_each_entry(iter, &kconsumerd_fd_list.head, list) {
@@ -403,85 +552,82 @@ static void change_fd_state(int sessiond_fd,
  * Returns the size of received data
  */
 static int consumerd_recv_fd(int sfd, int size,
-               enum lttcomm_consumerd_command cmd_type)
+               enum kconsumerd_command cmd_type)
 {
        struct msghdr msg;
        struct iovec iov[1];
-       int ret, i, tmp2;
+       int ret = 0, i, tmp2;
        struct cmsghdr *cmsg;
        int nb_fd;
-       char tmp[CMSG_SPACE(size)];
-       struct lttcomm_kconsumerd_msg *buf;
+       char recv_fd[CMSG_SPACE(sizeof(int))];
+       struct lttcomm_kconsumerd_msg lkm;
+
        /* the number of fds we are about to receive */
-       nb_fd = size/sizeof(struct lttcomm_kconsumerd_msg);
+       nb_fd = size / sizeof(struct lttcomm_kconsumerd_msg);
 
-       buf = malloc(size);
+       for (i = 0; i < nb_fd; i++) {
+               memset(&msg, 0, sizeof(msg));
 
-       memset(&msg, 0, sizeof(msg));
+               /* Prepare to receive the structures */
+               iov[0].iov_base = &lkm;
+               iov[0].iov_len = sizeof(lkm);
+               msg.msg_iov = iov;
+               msg.msg_iovlen = 1;
 
-       /* Prepare to receive the structures */
-       iov[0].iov_base = buf;
-       iov[0].iov_len = size;
-       msg.msg_iov = iov;
-       msg.msg_iovlen = 1;
+               msg.msg_control = recv_fd;
+               msg.msg_controllen = sizeof(recv_fd);
 
-       msg.msg_control = tmp;
-       msg.msg_controllen = sizeof(tmp);
+               DBG("Waiting to receive fd");
+               if ((ret = recvmsg(sfd, &msg, 0)) < 0) {
+                       perror("recvmsg");
+                       continue;
+               }
 
-       DBG("Waiting to receive fds");
-       if ((ret = recvmsg(sfd, &msg, 0)) < 0) {
-               perror("recvmsg");
-       }
-       if (ret != size) {
-               ERR("Received only %d, expected %d", ret, size);
-               send_error(KCONSUMERD_ERROR_RECV_FD);
-               goto end;
-       }
+               if (ret != (size / nb_fd)) {
+                       ERR("Received only %d, expected %d", ret, size);
+                       send_error(KCONSUMERD_ERROR_RECV_FD);
+                       goto end;
+               }
 
-       cmsg = CMSG_FIRSTHDR(&msg);
-       if (!cmsg) {
-               ERR("Invalid control message header");
-               ret = -1;
-               send_error(KCONSUMERD_ERROR_RECV_FD);
-               goto end;
-       }
+               cmsg = CMSG_FIRSTHDR(&msg);
+               if (!cmsg) {
+                       ERR("Invalid control message header");
+                       ret = -1;
+                       send_error(KCONSUMERD_ERROR_RECV_FD);
+                       goto end;
+               }
 
-       /* if we received fds */
-       if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
-               DBG("Receive : expecting %d fds", nb_fd);
-               for (i = 0; i < nb_fd; i++) {
+               /* if we received fds */
+               if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
                        switch (cmd_type) {
-                               case LTTCOMM_ADD_STREAM:
-                                       DBG("add_fd %s (%d)", buf[i].path_name, ((int *)CMSG_DATA(cmsg))[i]);
-                                       ret = add_fd(&buf[i], ((int *)CMSG_DATA(cmsg))[i]);
-                                       if (ret < 0) {
-                                               send_error(KCONSUMERD_OUTFD_ERROR);
-                                               goto end;
-                                       }
-                                       break;
-                               case LTTCOMM_UPDATE_STREAM:
-                                       change_fd_state(buf[i].fd, buf[i].state);
-                                       break;
-                               default:
-                                       break;
+                       case ADD_STREAM:
+                               DBG("add_fd %s (%d)", lkm.path_name, (CMSG_DATA(cmsg)[0]));
+                               ret = add_fd(&lkm, (CMSG_DATA(cmsg)[0]));
+                               if (ret < 0) {
+                                       send_error(KCONSUMERD_OUTFD_ERROR);
+                                       goto end;
+                               }
+                               break;
+                       case UPDATE_STREAM:
+                               change_fd_state(lkm.fd, lkm.state);
+                               break;
+                       default:
+                               break;
                        }
+                       /* flag to tell the polling thread to update its fd array */
+                       update_fd_array = 1;
+                       /* signal the poll thread */
+                       tmp2 = write(poll_pipe[1], "4", 1);
+               } else {
+                       ERR("Didn't received any fd");
+                       send_error(KCONSUMERD_ERROR_RECV_FD);
+                       ret = -1;
+                       goto end;
                }
-               /* flag to tell the polling thread to update its fd array */
-               update_fd_array = 1;
-               /* signal the poll thread */
-               tmp2 = write(poll_pipe[1], "4", 1);
-       } else {
-               ERR("Didn't received any fd");
-               send_error(KCONSUMERD_ERROR_RECV_FD);
-               ret = -1;
-               goto end;
        }
 
 end:
-       if (buf != NULL) {
-               free(buf);
-               buf = NULL;
-       }
+       DBG("consumerd_recv_fd thread exiting");
        return ret;
 }
 
@@ -501,43 +647,54 @@ static void *thread_receive_fds(void *data)
        client_socket = lttcomm_create_unix_sock(command_sock_path);
        if (client_socket < 0) {
                ERR("Cannot create command socket");
-               goto error;
+               goto end;
        }
 
        ret = lttcomm_listen_unix_sock(client_socket);
        if (ret < 0) {
-               goto error;
+               goto end;
        }
 
        DBG("Sending ready command to ltt-sessiond");
        ret = send_error(KCONSUMERD_COMMAND_SOCK_READY);
        if (ret < 0) {
                ERR("Error sending ready command to ltt-sessiond");
-               goto error;
+               goto end;
        }
 
        /* Blocking call, waiting for transmission */
        sock = lttcomm_accept_unix_sock(client_socket);
        if (sock <= 0) {
                WARN("On accept");
-               goto error;
+               goto end;
        }
        while (1) {
                /* We first get the number of fd we are about to receive */
                ret = lttcomm_recv_unix_sock(sock, &tmp,
                                sizeof(struct lttcomm_kconsumerd_header));
                if (ret <= 0) {
-                       ERR("Receiving the lttcomm_kconsumerd_header, exiting");
-                       goto error;
+                       ERR("Communication interrupted on command socket");
+                       goto end;
                }
+               if (tmp.cmd_type == STOP) {
+                       DBG("Received STOP command");
+                       goto end;
+               }
+               /* we received a command to add or update fds */
                ret = consumerd_recv_fd(sock, tmp.payload_size, tmp.cmd_type);
                if (ret <= 0) {
                        ERR("Receiving the FD, exiting");
-                       goto error;
+                       goto end;
                }
        }
 
-error:
+end:
+       DBG("thread_receive_fds exiting");
+       quit = 1;
+       ret = write(poll_pipe[1], "4", 1);
+       if (ret < 0) {
+               perror("poll pipe write");
+       }
        return NULL;
 }
 
@@ -567,8 +724,6 @@ static int update_poll_array(struct pollfd **pollfd,
                        (*pollfd)[i].events = POLLIN | POLLPRI;
                        local_kconsumerd_fd[i] = iter;
                        i++;
-               } else if (iter->state == DELETE_FD) {
-                       del_fd(iter);
                }
        }
        /*
@@ -617,7 +772,7 @@ static void *thread_poll_fds(void *data)
                 * the ltt_fd_list has been updated, we need to update our
                 * local array as well
                 */
-               if (update_fd_array) {
+               if (update_fd_array == 1) {
                        if (pollfd != NULL) {
                                free(pollfd);
                                pollfd = NULL;
@@ -658,6 +813,11 @@ static void *thread_poll_fds(void *data)
                        goto end;
                }
 
+               /* No FDs and quit, cleanup the thread */
+               if (nb_fd == 0 && quit == 1) {
+                       goto end;
+               }
+
                /*
                 * if only the poll_pipe triggered poll to return just return to the
                 * beginning of the loop to update the array
@@ -671,56 +831,61 @@ static void *thread_poll_fds(void *data)
                /* Take care of high priority channels first. */
                for (i = 0; i < nb_fd; i++) {
                        switch(pollfd[i].revents) {
-                               case POLLERR:
-                                       ERR("Error returned in polling fd %d.", pollfd[i].fd);
-                                       num_hup++;
-                                       send_error(KCONSUMERD_POLL_ERROR);
-                                       break;
-                               case POLLHUP:
-                                       ERR("Polling fd %d tells it has hung up.", pollfd[i].fd);
-                                       num_hup++;
-                                       break;
-                               case POLLNVAL:
-                                       ERR("Polling fd %d tells fd is not open.", pollfd[i].fd);
-                                       send_error(KCONSUMERD_POLL_NVAL);
-                                       num_hup++;
-                                       break;
-                               case POLLPRI:
-                                       DBG("Urgent read on fd %d", pollfd[i].fd);
-                                       high_prio = 1;
-                                       ret = read_subbuffer(local_kconsumerd_fd[i]);
-                                       /* it's ok to have an unavailable sub-buffer (FIXME : is it ?) */
-                                       if (ret == EAGAIN) {
-                                               ret = 0;
-                                       }
-                                       break;
+                       case POLLERR:
+                               ERR("Error returned in polling fd %d.", pollfd[i].fd);
+                               del_fd(local_kconsumerd_fd[i]);
+                               update_fd_array = 1;
+                               num_hup++;
+                               break;
+                       case POLLHUP:
+                               ERR("Polling fd %d tells it has hung up.", pollfd[i].fd);
+                               del_fd(local_kconsumerd_fd[i]);
+                               update_fd_array = 1;
+                               num_hup++;
+                               break;
+                       case POLLNVAL:
+                               ERR("Polling fd %d tells fd is not open.", pollfd[i].fd);
+                               del_fd(local_kconsumerd_fd[i]);
+                               update_fd_array = 1;
+                               num_hup++;
+                               break;
+                       case POLLPRI:
+                               DBG("Urgent read on fd %d", pollfd[i].fd);
+                               high_prio = 1;
+                               ret = read_subbuffer(local_kconsumerd_fd[i]);
+                               /* it's ok to have an unavailable sub-buffer (FIXME : is it ?) */
+                               if (ret == EAGAIN) {
+                                       ret = 0;
+                               }
+                               break;
                        }
                }
 
                /* If every buffer FD has hung up, we end the read loop here */
                if (nb_fd > 0 && num_hup == nb_fd) {
                        DBG("every buffer FD has hung up\n");
-                       send_error(KCONSUMERD_POLL_HUP);
-                       goto end;
+                       if (quit == 1) {
+                               goto end;
+                       }
+                       continue;
                }
 
                /* Take care of low priority channels. */
-               if (!high_prio) {
+               if (high_prio == 0) {
                        for (i = 0; i < nb_fd; i++) {
-                               switch(pollfd[i].revents) {
-                                       case POLLIN:
-                                               DBG("Normal read on fd %d", pollfd[i].fd);
-                                               ret = read_subbuffer(local_kconsumerd_fd[i]);
-                                               /* it's ok to have an unavailable subbuffer (FIXME : is it ?) */
-                                               if (ret == EAGAIN) {
-                                                       ret = 0;
-                                               }
-                                               break;
+                               if (pollfd[i].revents == POLLIN) {
+                                       DBG("Normal read on fd %d", pollfd[i].fd);
+                                       ret = read_subbuffer(local_kconsumerd_fd[i]);
+                                       /* it's ok to have an unavailable subbuffer (FIXME : is it ?) */
+                                       if (ret == EAGAIN) {
+                                               ret = 0;
+                                       }
                                }
                        }
                }
        }
 end:
+       DBG("polling thread exiting");
        if (pollfd != NULL) {
                free(pollfd);
                pollfd = NULL;
@@ -781,36 +946,36 @@ static void parse_args(int argc, char **argv)
                }
 
                switch (c) {
-                       case 0:
-                               fprintf(stderr, "option %s", long_options[option_index].name);
-                               if (optarg) {
-                                       fprintf(stderr, " with arg %s\n", optarg);
-                               }
-                               break;
-                       case 'c':
-                               snprintf(command_sock_path, PATH_MAX, "%s", optarg);
-                               break;
-                       case 'e':
-                               snprintf(error_sock_path, PATH_MAX, "%s", optarg);
-                               break;
-                       case 'd':
-                               opt_daemon = 1;
-                               break;
-                       case 'h':
-                               usage();
-                               exit(EXIT_FAILURE);
-                       case 'q':
-                               opt_quiet = 1;
-                               break;
-                       case 'v':
-                               opt_verbose = 1;
-                               break;
-                       case 'V':
-                               fprintf(stdout, "%s\n", VERSION);
-                               exit(EXIT_SUCCESS);
-                       default:
-                               usage();
-                               exit(EXIT_FAILURE);
+               case 0:
+                       fprintf(stderr, "option %s", long_options[option_index].name);
+                       if (optarg) {
+                               fprintf(stderr, " with arg %s\n", optarg);
+                       }
+                       break;
+               case 'c':
+                       snprintf(command_sock_path, PATH_MAX, "%s", optarg);
+                       break;
+               case 'e':
+                       snprintf(error_sock_path, PATH_MAX, "%s", optarg);
+                       break;
+               case 'd':
+                       opt_daemon = 1;
+                       break;
+               case 'h':
+                       usage();
+                       exit(EXIT_FAILURE);
+               case 'q':
+                       opt_quiet = 1;
+                       break;
+               case 'v':
+                       opt_verbose = 1;
+                       break;
+               case 'V':
+                       fprintf(stdout, "%s\n", VERSION);
+                       exit(EXIT_SUCCESS);
+               default:
+                       usage();
+                       exit(EXIT_FAILURE);
                }
        }
 }
This page took 0.032614 seconds and 4 git commands to generate.