X-Git-Url: https://git.lttng.org/?p=lttng-tools.git;a=blobdiff_plain;f=liblttngkconsumerd%2Flttngkconsumerd.c;fp=liblttngkconsumerd%2Flttngkconsumerd.c;h=ba26026c4458cb955ce70a5c2727a8b3b7413e48;hp=0000000000000000000000000000000000000000;hb=6533b585a3a53a0b52c2da14baec5e874d1bf3bb;hpb=57194bf28648ebf2dfbed08f52a6824b5dd726f5 diff --git a/liblttngkconsumerd/lttngkconsumerd.c b/liblttngkconsumerd/lttngkconsumerd.c new file mode 100644 index 000000000..ba26026c4 --- /dev/null +++ b/liblttngkconsumerd/lttngkconsumerd.c @@ -0,0 +1,965 @@ +/* + * Copyright (C) 2011 - Julien Desfossez + * Mathieu Desnoyers + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; only version 2 + * of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "kernelctl.h" +#include "lttngerr.h" +#include "lttng-sessiond-comm.h" + +static struct lttng_kconsumerd_global_data { + /* + * kconsumerd_data.lock protects kconsumerd_data.fd_list, + * kconsumerd_data.fds_count, and kconsumerd_data.need_update. It ensures + * the count matches the number of items in the fd_list. It ensures the + * list updates *always* trigger an fd_array update (therefore need to make + * list update vs kconsumerd_data.need_update flag update atomic, and also + * flag read, fd array and flag clear atomic). + */ + pthread_mutex_t lock; + /* + * Number of element for the list below. Protected by kconsumerd_data.lock. + */ + unsigned int fds_count; + /* + * List of FDs. Protected by kconsumerd_data.lock. + */ + struct lttng_kconsumerd_fd_list fd_list; + /* + * Flag specifying if the local array of FDs needs update in the poll + * function. Protected by kconsumerd_data.lock. + */ + unsigned int need_update; +} kconsumerd_data = { + .fd_list.head = CDS_LIST_HEAD_INIT(kconsumerd_data.fd_list.head), + .fds_count = 0, + .need_update = 1, +}; + +/* timeout parameter, to control the polling thread grace period. */ +static int kconsumerd_poll_timeout = -1; + +/* + * Flag to inform the polling thread to quit when all fd hung up. Updated by + * the kconsumerd_thread_receive_fds when it notices that all fds has hung up. + * Also updated by the signal handler (kconsumerd_should_exit()). Read by the + * polling threads. + */ +static volatile int kconsumerd_quit = 0; + +/* + * Find a session fd in the global list. The kconsumerd_data.lock must be + * locked during this call. + * + * Return 1 if found else 0. + */ +static int kconsumerd_find_session_fd(int fd) +{ + struct lttng_kconsumerd_fd *iter; + + cds_list_for_each_entry(iter, &kconsumerd_data.fd_list.head, list) { + if (iter->sessiond_fd == fd) { + DBG("Duplicate session fd %d", fd); + return 1; + } + } + + return 0; +} + +/* + * Remove a fd from the global list protected by a mutex. + */ +static void kconsumerd_del_fd(struct lttng_kconsumerd_fd *lcf) +{ + int ret; + pthread_mutex_lock(&kconsumerd_data.lock); + cds_list_del(&lcf->list); + if (kconsumerd_data.fds_count > 0) { + kconsumerd_data.fds_count--; + if (lcf != NULL) { + if (lcf->mmap_base != NULL) { + ret = munmap(lcf->mmap_base, lcf->mmap_len); + if (ret != 0) { + perror("munmap"); + } + } + if (lcf->out_fd != 0) { + close(lcf->out_fd); + } + close(lcf->consumerd_fd); + free(lcf); + lcf = NULL; + } + } + kconsumerd_data.need_update = 1; + pthread_mutex_unlock(&kconsumerd_data.lock); +} + +/* + * Add a fd to the global list protected by a mutex. + */ +static int kconsumerd_add_fd(struct lttcomm_kconsumerd_msg *buf, + int consumerd_fd) +{ + struct lttng_kconsumerd_fd *tmp_fd; + int ret = 0; + + pthread_mutex_lock(&kconsumerd_data.lock); + /* Check if already exist */ + ret = kconsumerd_find_session_fd(buf->fd); + if (ret == 1) { + goto end; + } + + tmp_fd = malloc(sizeof(struct lttng_kconsumerd_fd)); + tmp_fd->sessiond_fd = buf->fd; + tmp_fd->consumerd_fd = consumerd_fd; + tmp_fd->state = buf->state; + tmp_fd->max_sb_size = buf->max_sb_size; + tmp_fd->out_fd = 0; + tmp_fd->out_fd_offset = 0; + tmp_fd->mmap_len = 0; + tmp_fd->mmap_base = NULL; + tmp_fd->output = buf->output; + strncpy(tmp_fd->path_name, buf->path_name, PATH_MAX); + tmp_fd->path_name[PATH_MAX - 1] = '\0'; + + /* Opening the tracefile in write mode */ + if (tmp_fd->path_name != NULL) { + ret = open(tmp_fd->path_name, + O_WRONLY|O_CREAT|O_TRUNC, S_IRWXU|S_IRWXG|S_IRWXO); + if (ret < 0) { + ERR("Opening %s", tmp_fd->path_name); + perror("open"); + goto end; + } + tmp_fd->out_fd = ret; + DBG("Adding %s (%d, %d, %d)", tmp_fd->path_name, + tmp_fd->sessiond_fd, tmp_fd->consumerd_fd, tmp_fd->out_fd); + } + + if (tmp_fd->output == LTTNG_EVENT_MMAP) { + /* get the len of the mmap region */ + ret = kernctl_get_mmap_len(tmp_fd->consumerd_fd, &tmp_fd->mmap_len); + if (ret != 0) { + ret = errno; + perror("kernctl_get_mmap_len"); + goto end; + } + + tmp_fd->mmap_base = mmap(NULL, tmp_fd->mmap_len, + PROT_READ, MAP_PRIVATE, tmp_fd->consumerd_fd, 0); + if (tmp_fd->mmap_base == MAP_FAILED) { + perror("Error mmaping"); + ret = -1; + goto end; + } + } + + cds_list_add(&tmp_fd->list, &kconsumerd_data.fd_list.head); + kconsumerd_data.fds_count++; + kconsumerd_data.need_update = 1; +end: + pthread_mutex_unlock(&kconsumerd_data.lock); + return ret; +} + +/* + * Update a fd according to what we just received. + */ +static void kconsumerd_change_fd_state(int sessiond_fd, + enum lttng_kconsumerd_fd_state state) +{ + struct lttng_kconsumerd_fd *iter; + + pthread_mutex_lock(&kconsumerd_data.lock); + cds_list_for_each_entry(iter, &kconsumerd_data.fd_list.head, list) { + if (iter->sessiond_fd == sessiond_fd) { + iter->state = state; + break; + } + } + kconsumerd_data.need_update = 1; + pthread_mutex_unlock(&kconsumerd_data.lock); +} + +/* + * Allocate the pollfd structure and the local view of the out fds to avoid + * doing a lookup in the linked list and concurrency issues when writing is + * needed. Called with kconsumerd_data.lock held. + * + * Returns the number of fds in the structures. + */ +static int kconsumerd_update_poll_array( + struct lttng_kconsumerd_local_data *ctx, struct pollfd **pollfd, + struct lttng_kconsumerd_fd **local_kconsumerd_fd) +{ + struct lttng_kconsumerd_fd *iter; + int i = 0; + + DBG("Updating poll fd array"); + cds_list_for_each_entry(iter, &kconsumerd_data.fd_list.head, list) { + if (iter->state == ACTIVE_FD) { + DBG("Active FD %d", iter->consumerd_fd); + (*pollfd)[i].fd = iter->consumerd_fd; + (*pollfd)[i].events = POLLIN | POLLPRI; + local_kconsumerd_fd[i] = iter; + i++; + } + } + + /* + * Insert the kconsumerd_poll_pipe at the end of the array and don't + * increment i so nb_fd is the number of real FD. + */ + (*pollfd)[i].fd = ctx->kconsumerd_poll_pipe[0]; + (*pollfd)[i].events = POLLIN; + return i; +} + +/* + * Receives an array of file descriptors and the associated structures + * describing each fd (path name). + * + * Returns the size of received data + */ +static int kconsumerd_consumerd_recv_fd( + struct lttng_kconsumerd_local_data *ctx, int sfd, + struct pollfd *kconsumerd_sockpoll, int size, + enum lttng_kconsumerd_command cmd_type) +{ + struct iovec iov[1]; + int ret = 0, i, tmp2; + struct cmsghdr *cmsg; + int nb_fd; + char recv_fd[CMSG_SPACE(sizeof(int))]; + struct lttcomm_kconsumerd_msg lkm; + + /* the number of fds we are about to receive */ + nb_fd = size / sizeof(struct lttcomm_kconsumerd_msg); + + /* + * nb_fd is the number of fds we receive. One fd per recvmsg. + */ + for (i = 0; i < nb_fd; i++) { + struct msghdr msg = { 0 }; + + /* Prepare to receive the structures */ + iov[0].iov_base = &lkm; + iov[0].iov_len = sizeof(lkm); + msg.msg_iov = iov; + msg.msg_iovlen = 1; + + msg.msg_control = recv_fd; + msg.msg_controllen = sizeof(recv_fd); + + DBG("Waiting to receive fd"); + if (lttng_kconsumerd_poll_socket(kconsumerd_sockpoll) < 0) { + goto end; + } + + if ((ret = recvmsg(sfd, &msg, 0)) < 0) { + perror("recvmsg"); + continue; + } + + if (ret != (size / nb_fd)) { + ERR("Received only %d, expected %d", ret, size); + lttng_kconsumerd_send_error(ctx, KCONSUMERD_ERROR_RECV_FD); + goto end; + } + + cmsg = CMSG_FIRSTHDR(&msg); + if (!cmsg) { + ERR("Invalid control message header"); + ret = -1; + lttng_kconsumerd_send_error(ctx, KCONSUMERD_ERROR_RECV_FD); + goto end; + } + + /* if we received fds */ + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { + switch (cmd_type) { + case ADD_STREAM: + DBG("kconsumerd_add_fd %s (%d)", lkm.path_name, + ((int *) CMSG_DATA(cmsg))[0]); + + ret = kconsumerd_add_fd(&lkm, ((int *) CMSG_DATA(cmsg))[0]); + if (ret < 0) { + lttng_kconsumerd_send_error(ctx, KCONSUMERD_OUTFD_ERROR); + goto end; + } + break; + case UPDATE_STREAM: + kconsumerd_change_fd_state(lkm.fd, lkm.state); + break; + default: + break; + } + /* signal the poll thread */ + tmp2 = write(ctx->kconsumerd_poll_pipe[1], "4", 1); + if (tmp2 < 0) { + perror("write kconsumerd poll"); + } + } else { + ERR("Didn't received any fd"); + lttng_kconsumerd_send_error(ctx, KCONSUMERD_ERROR_RECV_FD); + ret = -1; + goto end; + } + } + +end: + return ret; +} + +/* + * Set the error socket. + */ +void lttng_kconsumerd_set_error_sock( + struct lttng_kconsumerd_local_data *ctx, int sock) +{ + ctx->kconsumerd_error_socket = sock; +} + +/* + * Set the command socket path. + */ + +void lttng_kconsumerd_set_command_sock_path( + struct lttng_kconsumerd_local_data *ctx, char *sock) +{ + ctx->kconsumerd_command_sock_path = sock; +} + +/* + * Mmap the ring buffer, read it and write the data to the tracefile. + * + * Returns the number of bytes written + */ +int lttng_kconsumerd_on_read_subbuffer_mmap( + struct lttng_kconsumerd_local_data *ctx, + struct lttng_kconsumerd_fd *kconsumerd_fd, unsigned long len) +{ + unsigned long mmap_offset; + char *padding = NULL; + long ret = 0; + off_t orig_offset = kconsumerd_fd->out_fd_offset; + int fd = kconsumerd_fd->consumerd_fd; + int outfd = kconsumerd_fd->out_fd; + + /* get the offset inside the fd to mmap */ + ret = kernctl_get_mmap_read_offset(fd, &mmap_offset); + if (ret != 0) { + ret = errno; + perror("kernctl_get_mmap_read_offset"); + goto end; + } + + while (len > 0) { + ret = write(outfd, kconsumerd_fd->mmap_base + mmap_offset, len); + if (ret >= len) { + len = 0; + } else if (ret < 0) { + ret = errno; + perror("Error in file write"); + goto end; + } + /* This won't block, but will start writeout asynchronously */ + sync_file_range(outfd, kconsumerd_fd->out_fd_offset, ret, + SYNC_FILE_RANGE_WRITE); + kconsumerd_fd->out_fd_offset += ret; + } + + /* + * This does a blocking write-and-wait on any page that belongs to the + * subbuffer prior to the one we just wrote. + * Don't care about error values, as these are just hints and ways to + * limit the amount of page cache used. + */ + if (orig_offset >= kconsumerd_fd->max_sb_size) { + sync_file_range(outfd, orig_offset - kconsumerd_fd->max_sb_size, + kconsumerd_fd->max_sb_size, + SYNC_FILE_RANGE_WAIT_BEFORE + | SYNC_FILE_RANGE_WRITE + | SYNC_FILE_RANGE_WAIT_AFTER); + + /* + * Give hints to the kernel about how we access the file: + * POSIX_FADV_DONTNEED : we won't re-access data in a near future after + * we write it. + * + * We need to call fadvise again after the file grows because the + * kernel does not seem to apply fadvise to non-existing parts of the + * file. + * + * Call fadvise _after_ having waited for the page writeback to + * complete because the dirty page writeback semantic is not well + * defined. So it can be expected to lead to lower throughput in + * streaming. + */ + posix_fadvise(outfd, orig_offset - kconsumerd_fd->max_sb_size, + kconsumerd_fd->max_sb_size, POSIX_FADV_DONTNEED); + } + goto end; + +end: + if (padding != NULL) { + free(padding); + } + return ret; +} + +/* + * Splice the data from the ring buffer to the tracefile. + * + * Returns the number of bytes spliced. + */ +int lttng_kconsumerd_on_read_subbuffer_splice( + struct lttng_kconsumerd_local_data *ctx, + struct lttng_kconsumerd_fd *kconsumerd_fd, unsigned long len) +{ + long ret = 0; + loff_t offset = 0; + off_t orig_offset = kconsumerd_fd->out_fd_offset; + int fd = kconsumerd_fd->consumerd_fd; + int outfd = kconsumerd_fd->out_fd; + + while (len > 0) { + DBG("splice chan to pipe offset %lu (fd : %d)", + (unsigned long)offset, fd); + ret = splice(fd, &offset, ctx->kconsumerd_thread_pipe[1], NULL, len, + SPLICE_F_MOVE | SPLICE_F_MORE); + DBG("splice chan to pipe ret %ld", ret); + if (ret < 0) { + ret = errno; + perror("Error in relay splice"); + goto splice_error; + } + + ret = splice(ctx->kconsumerd_thread_pipe[0], NULL, outfd, NULL, ret, + SPLICE_F_MOVE | SPLICE_F_MORE); + DBG("splice pipe to file %ld", ret); + if (ret < 0) { + ret = errno; + perror("Error in file splice"); + goto splice_error; + } + if (ret >= len) { + len = 0; + } + /* This won't block, but will start writeout asynchronously */ + sync_file_range(outfd, kconsumerd_fd->out_fd_offset, ret, + SYNC_FILE_RANGE_WRITE); + kconsumerd_fd->out_fd_offset += ret; + } + + /* + * This does a blocking write-and-wait on any page that belongs to the + * subbuffer prior to the one we just wrote. + * Don't care about error values, as these are just hints and ways to + * limit the amount of page cache used. + */ + if (orig_offset >= kconsumerd_fd->max_sb_size) { + sync_file_range(outfd, orig_offset - kconsumerd_fd->max_sb_size, + kconsumerd_fd->max_sb_size, + SYNC_FILE_RANGE_WAIT_BEFORE + | SYNC_FILE_RANGE_WRITE + | SYNC_FILE_RANGE_WAIT_AFTER); + /* + * Give hints to the kernel about how we access the file: + * POSIX_FADV_DONTNEED : we won't re-access data in a near future after + * we write it. + * + * We need to call fadvise again after the file grows because the + * kernel does not seem to apply fadvise to non-existing parts of the + * file. + * + * Call fadvise _after_ having waited for the page writeback to + * complete because the dirty page writeback semantic is not well + * defined. So it can be expected to lead to lower throughput in + * streaming. + */ + posix_fadvise(outfd, orig_offset - kconsumerd_fd->max_sb_size, + kconsumerd_fd->max_sb_size, POSIX_FADV_DONTNEED); + } + goto end; + +splice_error: + /* send the appropriate error description to sessiond */ + switch(ret) { + case EBADF: + lttng_kconsumerd_send_error(ctx, KCONSUMERD_SPLICE_EBADF); + break; + case EINVAL: + lttng_kconsumerd_send_error(ctx, KCONSUMERD_SPLICE_EINVAL); + break; + case ENOMEM: + lttng_kconsumerd_send_error(ctx, KCONSUMERD_SPLICE_ENOMEM); + break; + case ESPIPE: + lttng_kconsumerd_send_error(ctx, KCONSUMERD_SPLICE_ESPIPE); + break; + } + +end: + return ret; +} + +/* + * Poll on the should_quit pipe and the command socket return -1 on error and + * should exit, 0 if data is available on the command socket + */ +int lttng_kconsumerd_poll_socket(struct pollfd *kconsumerd_sockpoll) +{ + int num_rdy; + + num_rdy = poll(kconsumerd_sockpoll, 2, -1); + if (num_rdy == -1) { + perror("Poll error"); + goto exit; + } + if (kconsumerd_sockpoll[0].revents == POLLIN) { + DBG("kconsumerd_should_quit wake up"); + goto exit; + } + return 0; + +exit: + return -1; +} + +/* + * This thread polls the fds in the ltt_fd_list to consume the data and write + * it to tracefile if necessary. + */ +void *lttng_kconsumerd_thread_poll_fds(void *data) +{ + int num_rdy, num_hup, high_prio, ret, i; + struct pollfd *pollfd = NULL; + /* local view of the fds */ + struct lttng_kconsumerd_fd **local_kconsumerd_fd = NULL; + /* local view of kconsumerd_data.fds_count */ + int nb_fd = 0; + char tmp; + int tmp2; + struct lttng_kconsumerd_local_data *ctx = data; + + + local_kconsumerd_fd = malloc(sizeof(struct lttng_kconsumerd_fd)); + + while (1) { + high_prio = 0; + num_hup = 0; + + /* + * the ltt_fd_list has been updated, we need to update our + * local array as well + */ + pthread_mutex_lock(&kconsumerd_data.lock); + if (kconsumerd_data.need_update) { + if (pollfd != NULL) { + free(pollfd); + pollfd = NULL; + } + if (local_kconsumerd_fd != NULL) { + free(local_kconsumerd_fd); + local_kconsumerd_fd = NULL; + } + + /* allocate for all fds + 1 for the kconsumerd_poll_pipe */ + pollfd = malloc((kconsumerd_data.fds_count + 1) * sizeof(struct pollfd)); + if (pollfd == NULL) { + perror("pollfd malloc"); + pthread_mutex_unlock(&kconsumerd_data.lock); + goto end; + } + + /* allocate for all fds + 1 for the kconsumerd_poll_pipe */ + local_kconsumerd_fd = malloc((kconsumerd_data.fds_count + 1) * + sizeof(struct lttng_kconsumerd_fd)); + if (local_kconsumerd_fd == NULL) { + perror("local_kconsumerd_fd malloc"); + pthread_mutex_unlock(&kconsumerd_data.lock); + goto end; + } + ret = kconsumerd_update_poll_array(ctx, &pollfd, local_kconsumerd_fd); + if (ret < 0) { + ERR("Error in allocating pollfd or local_outfds"); + lttng_kconsumerd_send_error(ctx, KCONSUMERD_POLL_ERROR); + pthread_mutex_unlock(&kconsumerd_data.lock); + goto end; + } + nb_fd = ret; + kconsumerd_data.need_update = 0; + } + pthread_mutex_unlock(&kconsumerd_data.lock); + + /* poll on the array of fds */ + DBG("polling on %d fd", nb_fd + 1); + num_rdy = poll(pollfd, nb_fd + 1, kconsumerd_poll_timeout); + DBG("poll num_rdy : %d", num_rdy); + if (num_rdy == -1) { + perror("Poll error"); + lttng_kconsumerd_send_error(ctx, KCONSUMERD_POLL_ERROR); + goto end; + } else if (num_rdy == 0) { + DBG("Polling thread timed out"); + goto end; + } + + /* No FDs and kconsumerd_quit, kconsumerd_cleanup the thread */ + if (nb_fd == 0 && kconsumerd_quit == 1) { + goto end; + } + + /* + * If the kconsumerd_poll_pipe triggered poll go + * directly to the beginning of the loop to update the + * array. We want to prioritize array update over + * low-priority reads. + */ + if (pollfd[nb_fd].revents == POLLIN) { + DBG("kconsumerd_poll_pipe wake up"); + tmp2 = read(ctx->kconsumerd_poll_pipe[0], &tmp, 1); + if (tmp2 < 0) { + perror("read kconsumerd poll"); + } + continue; + } + + /* Take care of high priority channels first. */ + for (i = 0; i < nb_fd; i++) { + switch(pollfd[i].revents) { + case POLLERR: + ERR("Error returned in polling fd %d.", pollfd[i].fd); + kconsumerd_del_fd(local_kconsumerd_fd[i]); + num_hup++; + break; + case POLLHUP: + DBG("Polling fd %d tells it has hung up.", pollfd[i].fd); + kconsumerd_del_fd(local_kconsumerd_fd[i]); + num_hup++; + break; + case POLLNVAL: + ERR("Polling fd %d tells fd is not open.", pollfd[i].fd); + kconsumerd_del_fd(local_kconsumerd_fd[i]); + num_hup++; + break; + case POLLPRI: + DBG("Urgent read on fd %d", pollfd[i].fd); + high_prio = 1; + ret = ctx->on_buffer_ready(local_kconsumerd_fd[i]); + /* it's ok to have an unavailable sub-buffer */ + if (ret == EAGAIN) { + ret = 0; + } + break; + } + } + + /* If every buffer FD has hung up, we end the read loop here */ + if (nb_fd > 0 && num_hup == nb_fd) { + DBG("every buffer FD has hung up\n"); + if (kconsumerd_quit == 1) { + goto end; + } + continue; + } + + /* Take care of low priority channels. */ + if (high_prio == 0) { + for (i = 0; i < nb_fd; i++) { + if (pollfd[i].revents == POLLIN) { + DBG("Normal read on fd %d", pollfd[i].fd); + ret = ctx->on_buffer_ready(local_kconsumerd_fd[i]); + /* it's ok to have an unavailable subbuffer */ + if (ret == EAGAIN) { + ret = 0; + } + } + } + } + } +end: + DBG("polling thread exiting"); + if (pollfd != NULL) { + free(pollfd); + pollfd = NULL; + } + if (local_kconsumerd_fd != NULL) { + free(local_kconsumerd_fd); + local_kconsumerd_fd = NULL; + } + return NULL; +} + +/* + * Initialise the necessary environnement : + * - create a new context + * - create the poll_pipe + * - create the should_quit pipe (for signal handler) + * - create the thread pipe (for splice) + * + * Takes a function pointer as argument, this function is called when data is + * available on a buffer. This function is responsible to do the + * kernctl_get_next_subbuf, read the data with mmap or splice depending on the + * buffer configuration and then kernctl_put_next_subbuf at the end. + * + * Returns a pointer to the new context or NULL on error. + */ +struct lttng_kconsumerd_local_data *lttng_kconsumerd_create( + int (*buffer_ready)(struct lttng_kconsumerd_fd *kconsumerd_fd)) +{ + int ret; + struct lttng_kconsumerd_local_data *ctx; + + ctx = malloc(sizeof(struct lttng_kconsumerd_local_data)); + if (ctx == NULL) { + perror("allocating context"); + goto end; + } + + ctx->on_buffer_ready = buffer_ready; + + ret = pipe(ctx->kconsumerd_poll_pipe); + if (ret < 0) { + perror("Error creating poll pipe"); + ctx = NULL; + goto end; + } + + ret = pipe(ctx->kconsumerd_should_quit); + if (ret < 0) { + perror("Error creating recv pipe"); + ctx = NULL; + goto end; + } + + ret = pipe(ctx->kconsumerd_thread_pipe); + if (ret < 0) { + perror("Error creating thread pipe"); + ctx = NULL; + goto end; + } + +end: + return ctx; +} + +/* + * Close all fds associated with the instance and free the context. + */ +void lttng_kconsumerd_destroy(struct lttng_kconsumerd_local_data *ctx) +{ + close(ctx->kconsumerd_error_socket); + close(ctx->kconsumerd_thread_pipe[0]); + close(ctx->kconsumerd_thread_pipe[1]); + close(ctx->kconsumerd_poll_pipe[0]); + close(ctx->kconsumerd_poll_pipe[1]); + close(ctx->kconsumerd_should_quit[0]); + close(ctx->kconsumerd_should_quit[1]); + unlink(ctx->kconsumerd_command_sock_path); + free(ctx); + ctx = NULL; +} + +/* + * This thread listens on the consumerd socket and receives the file + * descriptors from the session daemon. + */ +void *lttng_kconsumerd_thread_receive_fds(void *data) +{ + int sock, client_socket, ret; + struct lttcomm_kconsumerd_header tmp; + /* + * structure to poll for incoming data on communication socket avoids + * making blocking sockets. + */ + struct pollfd kconsumerd_sockpoll[2]; + struct lttng_kconsumerd_local_data *ctx = data; + + + DBG("Creating command socket %s", ctx->kconsumerd_command_sock_path); + unlink(ctx->kconsumerd_command_sock_path); + client_socket = lttcomm_create_unix_sock(ctx->kconsumerd_command_sock_path); + if (client_socket < 0) { + ERR("Cannot create command socket"); + goto end; + } + + ret = lttcomm_listen_unix_sock(client_socket); + if (ret < 0) { + goto end; + } + + DBG("Sending ready command to ltt-sessiond"); + ret = lttng_kconsumerd_send_error(ctx, KCONSUMERD_COMMAND_SOCK_READY); + if (ret < 0) { + ERR("Error sending ready command to ltt-sessiond"); + goto end; + } + + ret = fcntl(client_socket, F_SETFL, O_NONBLOCK); + if (ret < 0) { + perror("fcntl O_NONBLOCK"); + goto end; + } + + /* prepare the FDs to poll : to client socket and the should_quit pipe */ + kconsumerd_sockpoll[0].fd = ctx->kconsumerd_should_quit[0]; + kconsumerd_sockpoll[0].events = POLLIN | POLLPRI; + kconsumerd_sockpoll[1].fd = client_socket; + kconsumerd_sockpoll[1].events = POLLIN | POLLPRI; + + if (lttng_kconsumerd_poll_socket(kconsumerd_sockpoll) < 0) { + goto end; + } + DBG("Connection on client_socket"); + + /* Blocking call, waiting for transmission */ + sock = lttcomm_accept_unix_sock(client_socket); + if (sock <= 0) { + WARN("On accept"); + goto end; + } + ret = fcntl(sock, F_SETFL, O_NONBLOCK); + if (ret < 0) { + perror("fcntl O_NONBLOCK"); + goto end; + } + + /* update the polling structure to poll on the established socket */ + kconsumerd_sockpoll[1].fd = sock; + kconsumerd_sockpoll[1].events = POLLIN | POLLPRI; + + while (1) { + if (lttng_kconsumerd_poll_socket(kconsumerd_sockpoll) < 0) { + goto end; + } + DBG("Incoming fds on sock"); + + /* We first get the number of fd we are about to receive */ + ret = lttcomm_recv_unix_sock(sock, &tmp, + sizeof(struct lttcomm_kconsumerd_header)); + if (ret <= 0) { + ERR("Communication interrupted on command socket"); + goto end; + } + if (tmp.cmd_type == STOP) { + DBG("Received STOP command"); + goto end; + } + if (kconsumerd_quit) { + DBG("kconsumerd_thread_receive_fds received quit from signal"); + goto end; + } + + /* we received a command to add or update fds */ + ret = kconsumerd_consumerd_recv_fd(ctx, sock, kconsumerd_sockpoll, + tmp.payload_size, tmp.cmd_type); + if (ret < 0) { + ERR("Receiving the FD, exiting"); + goto end; + } + DBG("received fds on sock"); + } + +end: + DBG("kconsumerd_thread_receive_fds exiting"); + + /* + * when all fds have hung up, the polling thread + * can exit cleanly + */ + kconsumerd_quit = 1; + + /* + * 2s of grace period, if no polling events occur during + * this period, the polling thread will exit even if there + * are still open FDs (should not happen, but safety mechanism). + */ + kconsumerd_poll_timeout = LTTNG_KCONSUMERD_POLL_GRACE_PERIOD; + + /* wake up the polling thread */ + ret = write(ctx->kconsumerd_poll_pipe[1], "4", 1); + if (ret < 0) { + perror("poll pipe write"); + } + return NULL; +} + +/* + * Close all the tracefiles and stream fds, should be called when all instances + * are destroyed. + */ +void lttng_kconsumerd_cleanup(void) +{ + struct lttng_kconsumerd_fd *iter, *tmp; + + /* + * close all outfd. Called when there are no more threads + * running (after joining on the threads), no need to protect + * list iteration with mutex. + */ + cds_list_for_each_entry_safe(iter, tmp, + &kconsumerd_data.fd_list.head, list) { + kconsumerd_del_fd(iter); + } +} + +/* + * Called from signal handler. + */ +void lttng_kconsumerd_should_exit(struct lttng_kconsumerd_local_data *ctx) +{ + int ret; + kconsumerd_quit = 1; + ret = write(ctx->kconsumerd_should_quit[1], "4", 1); + if (ret < 0) { + perror("write kconsumerd quit"); + } +} + +/* + * Send return code to the session daemon. + */ +int lttng_kconsumerd_send_error( + struct lttng_kconsumerd_local_data *ctx, int cmd) +{ + if (ctx->kconsumerd_error_socket > 0) { + return lttcomm_send_unix_sock(ctx->kconsumerd_error_socket, &cmd, + sizeof(enum lttcomm_sessiond_command)); + } + + return 0; +}