kconsumerd: fix infinite loop in splice handling of subbuf larger than 4k
[lttng-tools.git] / liblttngkconsumerd / lttngkconsumerd.c
1 /*
2 * Copyright (C) 2011 - Julien Desfossez <julien.desfossez@polymtl.ca>
3 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; only version 2
8 * of the License.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 */
19
20 #define _GNU_SOURCE
21 #include <fcntl.h>
22 #include <poll.h>
23 #include <pthread.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/mman.h>
27 #include <sys/socket.h>
28 #include <sys/types.h>
29 #include <unistd.h>
30 #include <urcu/list.h>
31
32 #include <lttng/lttng-kconsumerd.h>
33
34 #include "kernelctl.h"
35 #include "lttngerr.h"
36 #include "lttng-sessiond-comm.h"
37
38 static struct lttng_kconsumerd_global_data {
39 /*
40 * kconsumerd_data.lock protects kconsumerd_data.fd_list,
41 * kconsumerd_data.fds_count, and kconsumerd_data.need_update. It ensures
42 * the count matches the number of items in the fd_list. It ensures the
43 * list updates *always* trigger an fd_array update (therefore need to make
44 * list update vs kconsumerd_data.need_update flag update atomic, and also
45 * flag read, fd array and flag clear atomic).
46 */
47 pthread_mutex_t lock;
48 /*
49 * Number of element for the list below. Protected by kconsumerd_data.lock.
50 */
51 unsigned int fds_count;
52 /*
53 * List of FDs. Protected by kconsumerd_data.lock.
54 */
55 struct lttng_kconsumerd_fd_list fd_list;
56 /*
57 * Flag specifying if the local array of FDs needs update in the poll
58 * function. Protected by kconsumerd_data.lock.
59 */
60 unsigned int need_update;
61 } kconsumerd_data = {
62 .fd_list.head = CDS_LIST_HEAD_INIT(kconsumerd_data.fd_list.head),
63 .fds_count = 0,
64 .need_update = 1,
65 };
66
67 /* timeout parameter, to control the polling thread grace period. */
68 static int kconsumerd_poll_timeout = -1;
69
70 /*
71 * Flag to inform the polling thread to quit when all fd hung up. Updated by
72 * the kconsumerd_thread_receive_fds when it notices that all fds has hung up.
73 * Also updated by the signal handler (kconsumerd_should_exit()). Read by the
74 * polling threads.
75 */
76 static volatile int kconsumerd_quit = 0;
77
78 /*
79 * Find a session fd in the global list. The kconsumerd_data.lock must be
80 * locked during this call.
81 *
82 * Return 1 if found else 0.
83 */
84 static int kconsumerd_find_session_fd(int fd)
85 {
86 struct lttng_kconsumerd_fd *iter;
87
88 cds_list_for_each_entry(iter, &kconsumerd_data.fd_list.head, list) {
89 if (iter->sessiond_fd == fd) {
90 DBG("Duplicate session fd %d", fd);
91 return 1;
92 }
93 }
94
95 return 0;
96 }
97
98 /*
99 * Remove a fd from the global list protected by a mutex.
100 */
101 static void kconsumerd_del_fd(struct lttng_kconsumerd_fd *lcf)
102 {
103 int ret;
104 pthread_mutex_lock(&kconsumerd_data.lock);
105 cds_list_del(&lcf->list);
106 if (kconsumerd_data.fds_count > 0) {
107 kconsumerd_data.fds_count--;
108 if (lcf != NULL) {
109 if (lcf->mmap_base != NULL) {
110 ret = munmap(lcf->mmap_base, lcf->mmap_len);
111 if (ret != 0) {
112 perror("munmap");
113 }
114 }
115 if (lcf->out_fd != 0) {
116 close(lcf->out_fd);
117 }
118 close(lcf->consumerd_fd);
119 free(lcf);
120 lcf = NULL;
121 }
122 }
123 kconsumerd_data.need_update = 1;
124 pthread_mutex_unlock(&kconsumerd_data.lock);
125 }
126
127 /*
128 * Add a fd to the global list protected by a mutex.
129 */
130 static int kconsumerd_add_fd(struct lttcomm_kconsumerd_msg *buf,
131 int consumerd_fd)
132 {
133 struct lttng_kconsumerd_fd *tmp_fd;
134 int ret = 0;
135
136 pthread_mutex_lock(&kconsumerd_data.lock);
137 /* Check if already exist */
138 ret = kconsumerd_find_session_fd(buf->fd);
139 if (ret == 1) {
140 goto end;
141 }
142
143 tmp_fd = malloc(sizeof(struct lttng_kconsumerd_fd));
144 tmp_fd->sessiond_fd = buf->fd;
145 tmp_fd->consumerd_fd = consumerd_fd;
146 tmp_fd->state = buf->state;
147 tmp_fd->max_sb_size = buf->max_sb_size;
148 tmp_fd->out_fd = 0;
149 tmp_fd->out_fd_offset = 0;
150 tmp_fd->mmap_len = 0;
151 tmp_fd->mmap_base = NULL;
152 tmp_fd->output = buf->output;
153 strncpy(tmp_fd->path_name, buf->path_name, PATH_MAX);
154 tmp_fd->path_name[PATH_MAX - 1] = '\0';
155
156 /* Opening the tracefile in write mode */
157 if (tmp_fd->path_name != NULL) {
158 ret = open(tmp_fd->path_name,
159 O_WRONLY|O_CREAT|O_TRUNC, S_IRWXU|S_IRWXG|S_IRWXO);
160 if (ret < 0) {
161 ERR("Opening %s", tmp_fd->path_name);
162 perror("open");
163 goto end;
164 }
165 tmp_fd->out_fd = ret;
166 DBG("Adding %s (%d, %d, %d)", tmp_fd->path_name,
167 tmp_fd->sessiond_fd, tmp_fd->consumerd_fd, tmp_fd->out_fd);
168 }
169
170 if (tmp_fd->output == LTTNG_EVENT_MMAP) {
171 /* get the len of the mmap region */
172 ret = kernctl_get_mmap_len(tmp_fd->consumerd_fd, &tmp_fd->mmap_len);
173 if (ret != 0) {
174 ret = errno;
175 perror("kernctl_get_mmap_len");
176 goto end;
177 }
178
179 tmp_fd->mmap_base = mmap(NULL, tmp_fd->mmap_len,
180 PROT_READ, MAP_PRIVATE, tmp_fd->consumerd_fd, 0);
181 if (tmp_fd->mmap_base == MAP_FAILED) {
182 perror("Error mmaping");
183 ret = -1;
184 goto end;
185 }
186 }
187
188 cds_list_add(&tmp_fd->list, &kconsumerd_data.fd_list.head);
189 kconsumerd_data.fds_count++;
190 kconsumerd_data.need_update = 1;
191 end:
192 pthread_mutex_unlock(&kconsumerd_data.lock);
193 return ret;
194 }
195
196 /*
197 * Update a fd according to what we just received.
198 */
199 static void kconsumerd_change_fd_state(int sessiond_fd,
200 enum lttng_kconsumerd_fd_state state)
201 {
202 struct lttng_kconsumerd_fd *iter;
203
204 pthread_mutex_lock(&kconsumerd_data.lock);
205 cds_list_for_each_entry(iter, &kconsumerd_data.fd_list.head, list) {
206 if (iter->sessiond_fd == sessiond_fd) {
207 iter->state = state;
208 break;
209 }
210 }
211 kconsumerd_data.need_update = 1;
212 pthread_mutex_unlock(&kconsumerd_data.lock);
213 }
214
215 /*
216 * Allocate the pollfd structure and the local view of the out fds to avoid
217 * doing a lookup in the linked list and concurrency issues when writing is
218 * needed. Called with kconsumerd_data.lock held.
219 *
220 * Returns the number of fds in the structures.
221 */
222 static int kconsumerd_update_poll_array(
223 struct lttng_kconsumerd_local_data *ctx, struct pollfd **pollfd,
224 struct lttng_kconsumerd_fd **local_kconsumerd_fd)
225 {
226 struct lttng_kconsumerd_fd *iter;
227 int i = 0;
228
229 DBG("Updating poll fd array");
230 cds_list_for_each_entry(iter, &kconsumerd_data.fd_list.head, list) {
231 if (iter->state == ACTIVE_FD) {
232 DBG("Active FD %d", iter->consumerd_fd);
233 (*pollfd)[i].fd = iter->consumerd_fd;
234 (*pollfd)[i].events = POLLIN | POLLPRI;
235 local_kconsumerd_fd[i] = iter;
236 i++;
237 }
238 }
239
240 /*
241 * Insert the kconsumerd_poll_pipe at the end of the array and don't
242 * increment i so nb_fd is the number of real FD.
243 */
244 (*pollfd)[i].fd = ctx->kconsumerd_poll_pipe[0];
245 (*pollfd)[i].events = POLLIN;
246 return i;
247 }
248
249 /*
250 * Receives an array of file descriptors and the associated structures
251 * describing each fd (path name).
252 *
253 * Returns the size of received data
254 */
255 static int kconsumerd_consumerd_recv_fd(
256 struct lttng_kconsumerd_local_data *ctx, int sfd,
257 struct pollfd *kconsumerd_sockpoll, int size,
258 enum lttng_kconsumerd_command cmd_type)
259 {
260 struct iovec iov[1];
261 int ret = 0, i, tmp2;
262 struct cmsghdr *cmsg;
263 int nb_fd;
264 char recv_fd[CMSG_SPACE(sizeof(int))];
265 struct lttcomm_kconsumerd_msg lkm;
266
267 /* the number of fds we are about to receive */
268 nb_fd = size / sizeof(struct lttcomm_kconsumerd_msg);
269
270 /*
271 * nb_fd is the number of fds we receive. One fd per recvmsg.
272 */
273 for (i = 0; i < nb_fd; i++) {
274 struct msghdr msg = { 0 };
275
276 /* Prepare to receive the structures */
277 iov[0].iov_base = &lkm;
278 iov[0].iov_len = sizeof(lkm);
279 msg.msg_iov = iov;
280 msg.msg_iovlen = 1;
281
282 msg.msg_control = recv_fd;
283 msg.msg_controllen = sizeof(recv_fd);
284
285 DBG("Waiting to receive fd");
286 if (lttng_kconsumerd_poll_socket(kconsumerd_sockpoll) < 0) {
287 goto end;
288 }
289
290 if ((ret = recvmsg(sfd, &msg, 0)) < 0) {
291 perror("recvmsg");
292 continue;
293 }
294
295 if (ret != (size / nb_fd)) {
296 ERR("Received only %d, expected %d", ret, size);
297 lttng_kconsumerd_send_error(ctx, KCONSUMERD_ERROR_RECV_FD);
298 goto end;
299 }
300
301 cmsg = CMSG_FIRSTHDR(&msg);
302 if (!cmsg) {
303 ERR("Invalid control message header");
304 ret = -1;
305 lttng_kconsumerd_send_error(ctx, KCONSUMERD_ERROR_RECV_FD);
306 goto end;
307 }
308
309 /* if we received fds */
310 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
311 switch (cmd_type) {
312 case ADD_STREAM:
313 DBG("kconsumerd_add_fd %s (%d)", lkm.path_name,
314 ((int *) CMSG_DATA(cmsg))[0]);
315
316 ret = kconsumerd_add_fd(&lkm, ((int *) CMSG_DATA(cmsg))[0]);
317 if (ret < 0) {
318 lttng_kconsumerd_send_error(ctx, KCONSUMERD_OUTFD_ERROR);
319 goto end;
320 }
321 break;
322 case UPDATE_STREAM:
323 kconsumerd_change_fd_state(lkm.fd, lkm.state);
324 break;
325 default:
326 break;
327 }
328 /* signal the poll thread */
329 tmp2 = write(ctx->kconsumerd_poll_pipe[1], "4", 1);
330 if (tmp2 < 0) {
331 perror("write kconsumerd poll");
332 }
333 } else {
334 ERR("Didn't received any fd");
335 lttng_kconsumerd_send_error(ctx, KCONSUMERD_ERROR_RECV_FD);
336 ret = -1;
337 goto end;
338 }
339 }
340
341 end:
342 return ret;
343 }
344
345 /*
346 * Set the error socket.
347 */
348 void lttng_kconsumerd_set_error_sock(
349 struct lttng_kconsumerd_local_data *ctx, int sock)
350 {
351 ctx->kconsumerd_error_socket = sock;
352 }
353
354 /*
355 * Set the command socket path.
356 */
357
358 void lttng_kconsumerd_set_command_sock_path(
359 struct lttng_kconsumerd_local_data *ctx, char *sock)
360 {
361 ctx->kconsumerd_command_sock_path = sock;
362 }
363
364 /*
365 * Mmap the ring buffer, read it and write the data to the tracefile.
366 *
367 * Returns the number of bytes written
368 */
369 int lttng_kconsumerd_on_read_subbuffer_mmap(
370 struct lttng_kconsumerd_local_data *ctx,
371 struct lttng_kconsumerd_fd *kconsumerd_fd, unsigned long len)
372 {
373 unsigned long mmap_offset;
374 char *padding = NULL;
375 long ret = 0;
376 off_t orig_offset = kconsumerd_fd->out_fd_offset;
377 int fd = kconsumerd_fd->consumerd_fd;
378 int outfd = kconsumerd_fd->out_fd;
379
380 /* get the offset inside the fd to mmap */
381 ret = kernctl_get_mmap_read_offset(fd, &mmap_offset);
382 if (ret != 0) {
383 ret = errno;
384 perror("kernctl_get_mmap_read_offset");
385 goto end;
386 }
387
388 while (len > 0) {
389 ret = write(outfd, kconsumerd_fd->mmap_base + mmap_offset, len);
390 if (ret >= len) {
391 len = 0;
392 } else if (ret < 0) {
393 ret = errno;
394 perror("Error in file write");
395 goto end;
396 }
397 /* This won't block, but will start writeout asynchronously */
398 sync_file_range(outfd, kconsumerd_fd->out_fd_offset, ret,
399 SYNC_FILE_RANGE_WRITE);
400 kconsumerd_fd->out_fd_offset += ret;
401 }
402
403 /*
404 * This does a blocking write-and-wait on any page that belongs to the
405 * subbuffer prior to the one we just wrote.
406 * Don't care about error values, as these are just hints and ways to
407 * limit the amount of page cache used.
408 */
409 if (orig_offset >= kconsumerd_fd->max_sb_size) {
410 sync_file_range(outfd, orig_offset - kconsumerd_fd->max_sb_size,
411 kconsumerd_fd->max_sb_size,
412 SYNC_FILE_RANGE_WAIT_BEFORE
413 | SYNC_FILE_RANGE_WRITE
414 | SYNC_FILE_RANGE_WAIT_AFTER);
415
416 /*
417 * Give hints to the kernel about how we access the file:
418 * POSIX_FADV_DONTNEED : we won't re-access data in a near future after
419 * we write it.
420 *
421 * We need to call fadvise again after the file grows because the
422 * kernel does not seem to apply fadvise to non-existing parts of the
423 * file.
424 *
425 * Call fadvise _after_ having waited for the page writeback to
426 * complete because the dirty page writeback semantic is not well
427 * defined. So it can be expected to lead to lower throughput in
428 * streaming.
429 */
430 posix_fadvise(outfd, orig_offset - kconsumerd_fd->max_sb_size,
431 kconsumerd_fd->max_sb_size, POSIX_FADV_DONTNEED);
432 }
433 goto end;
434
435 end:
436 if (padding != NULL) {
437 free(padding);
438 }
439 return ret;
440 }
441
442 /*
443 * Splice the data from the ring buffer to the tracefile.
444 *
445 * Returns the number of bytes spliced.
446 */
447 int lttng_kconsumerd_on_read_subbuffer_splice(
448 struct lttng_kconsumerd_local_data *ctx,
449 struct lttng_kconsumerd_fd *kconsumerd_fd, unsigned long len)
450 {
451 long ret = 0;
452 loff_t offset = 0;
453 off_t orig_offset = kconsumerd_fd->out_fd_offset;
454 int fd = kconsumerd_fd->consumerd_fd;
455 int outfd = kconsumerd_fd->out_fd;
456
457 while (len > 0) {
458 DBG("splice chan to pipe offset %lu (fd : %d)",
459 (unsigned long)offset, fd);
460 ret = splice(fd, &offset, ctx->kconsumerd_thread_pipe[1], NULL, len,
461 SPLICE_F_MOVE | SPLICE_F_MORE);
462 DBG("splice chan to pipe ret %ld", ret);
463 if (ret < 0) {
464 ret = errno;
465 perror("Error in relay splice");
466 goto splice_error;
467 }
468
469 ret = splice(ctx->kconsumerd_thread_pipe[0], NULL, outfd, NULL, ret,
470 SPLICE_F_MOVE | SPLICE_F_MORE);
471 DBG("splice pipe to file %ld", ret);
472 if (ret < 0) {
473 ret = errno;
474 perror("Error in file splice");
475 goto splice_error;
476 }
477 len -= ret;
478 /* This won't block, but will start writeout asynchronously */
479 sync_file_range(outfd, kconsumerd_fd->out_fd_offset, ret,
480 SYNC_FILE_RANGE_WRITE);
481 kconsumerd_fd->out_fd_offset += ret;
482 }
483
484 /*
485 * This does a blocking write-and-wait on any page that belongs to the
486 * subbuffer prior to the one we just wrote.
487 * Don't care about error values, as these are just hints and ways to
488 * limit the amount of page cache used.
489 */
490 if (orig_offset >= kconsumerd_fd->max_sb_size) {
491 sync_file_range(outfd, orig_offset - kconsumerd_fd->max_sb_size,
492 kconsumerd_fd->max_sb_size,
493 SYNC_FILE_RANGE_WAIT_BEFORE
494 | SYNC_FILE_RANGE_WRITE
495 | SYNC_FILE_RANGE_WAIT_AFTER);
496 /*
497 * Give hints to the kernel about how we access the file:
498 * POSIX_FADV_DONTNEED : we won't re-access data in a near future after
499 * we write it.
500 *
501 * We need to call fadvise again after the file grows because the
502 * kernel does not seem to apply fadvise to non-existing parts of the
503 * file.
504 *
505 * Call fadvise _after_ having waited for the page writeback to
506 * complete because the dirty page writeback semantic is not well
507 * defined. So it can be expected to lead to lower throughput in
508 * streaming.
509 */
510 posix_fadvise(outfd, orig_offset - kconsumerd_fd->max_sb_size,
511 kconsumerd_fd->max_sb_size, POSIX_FADV_DONTNEED);
512 }
513 goto end;
514
515 splice_error:
516 /* send the appropriate error description to sessiond */
517 switch(ret) {
518 case EBADF:
519 lttng_kconsumerd_send_error(ctx, KCONSUMERD_SPLICE_EBADF);
520 break;
521 case EINVAL:
522 lttng_kconsumerd_send_error(ctx, KCONSUMERD_SPLICE_EINVAL);
523 break;
524 case ENOMEM:
525 lttng_kconsumerd_send_error(ctx, KCONSUMERD_SPLICE_ENOMEM);
526 break;
527 case ESPIPE:
528 lttng_kconsumerd_send_error(ctx, KCONSUMERD_SPLICE_ESPIPE);
529 break;
530 }
531
532 end:
533 return ret;
534 }
535
536 /*
537 * Poll on the should_quit pipe and the command socket return -1 on error and
538 * should exit, 0 if data is available on the command socket
539 */
540 int lttng_kconsumerd_poll_socket(struct pollfd *kconsumerd_sockpoll)
541 {
542 int num_rdy;
543
544 num_rdy = poll(kconsumerd_sockpoll, 2, -1);
545 if (num_rdy == -1) {
546 perror("Poll error");
547 goto exit;
548 }
549 if (kconsumerd_sockpoll[0].revents == POLLIN) {
550 DBG("kconsumerd_should_quit wake up");
551 goto exit;
552 }
553 return 0;
554
555 exit:
556 return -1;
557 }
558
559 /*
560 * This thread polls the fds in the ltt_fd_list to consume the data and write
561 * it to tracefile if necessary.
562 */
563 void *lttng_kconsumerd_thread_poll_fds(void *data)
564 {
565 int num_rdy, num_hup, high_prio, ret, i;
566 struct pollfd *pollfd = NULL;
567 /* local view of the fds */
568 struct lttng_kconsumerd_fd **local_kconsumerd_fd = NULL;
569 /* local view of kconsumerd_data.fds_count */
570 int nb_fd = 0;
571 char tmp;
572 int tmp2;
573 struct lttng_kconsumerd_local_data *ctx = data;
574
575
576 local_kconsumerd_fd = malloc(sizeof(struct lttng_kconsumerd_fd));
577
578 while (1) {
579 high_prio = 0;
580 num_hup = 0;
581
582 /*
583 * the ltt_fd_list has been updated, we need to update our
584 * local array as well
585 */
586 pthread_mutex_lock(&kconsumerd_data.lock);
587 if (kconsumerd_data.need_update) {
588 if (pollfd != NULL) {
589 free(pollfd);
590 pollfd = NULL;
591 }
592 if (local_kconsumerd_fd != NULL) {
593 free(local_kconsumerd_fd);
594 local_kconsumerd_fd = NULL;
595 }
596
597 /* allocate for all fds + 1 for the kconsumerd_poll_pipe */
598 pollfd = malloc((kconsumerd_data.fds_count + 1) * sizeof(struct pollfd));
599 if (pollfd == NULL) {
600 perror("pollfd malloc");
601 pthread_mutex_unlock(&kconsumerd_data.lock);
602 goto end;
603 }
604
605 /* allocate for all fds + 1 for the kconsumerd_poll_pipe */
606 local_kconsumerd_fd = malloc((kconsumerd_data.fds_count + 1) *
607 sizeof(struct lttng_kconsumerd_fd));
608 if (local_kconsumerd_fd == NULL) {
609 perror("local_kconsumerd_fd malloc");
610 pthread_mutex_unlock(&kconsumerd_data.lock);
611 goto end;
612 }
613 ret = kconsumerd_update_poll_array(ctx, &pollfd, local_kconsumerd_fd);
614 if (ret < 0) {
615 ERR("Error in allocating pollfd or local_outfds");
616 lttng_kconsumerd_send_error(ctx, KCONSUMERD_POLL_ERROR);
617 pthread_mutex_unlock(&kconsumerd_data.lock);
618 goto end;
619 }
620 nb_fd = ret;
621 kconsumerd_data.need_update = 0;
622 }
623 pthread_mutex_unlock(&kconsumerd_data.lock);
624
625 /* poll on the array of fds */
626 DBG("polling on %d fd", nb_fd + 1);
627 num_rdy = poll(pollfd, nb_fd + 1, kconsumerd_poll_timeout);
628 DBG("poll num_rdy : %d", num_rdy);
629 if (num_rdy == -1) {
630 perror("Poll error");
631 lttng_kconsumerd_send_error(ctx, KCONSUMERD_POLL_ERROR);
632 goto end;
633 } else if (num_rdy == 0) {
634 DBG("Polling thread timed out");
635 goto end;
636 }
637
638 /* No FDs and kconsumerd_quit, kconsumerd_cleanup the thread */
639 if (nb_fd == 0 && kconsumerd_quit == 1) {
640 goto end;
641 }
642
643 /*
644 * If the kconsumerd_poll_pipe triggered poll go
645 * directly to the beginning of the loop to update the
646 * array. We want to prioritize array update over
647 * low-priority reads.
648 */
649 if (pollfd[nb_fd].revents == POLLIN) {
650 DBG("kconsumerd_poll_pipe wake up");
651 tmp2 = read(ctx->kconsumerd_poll_pipe[0], &tmp, 1);
652 if (tmp2 < 0) {
653 perror("read kconsumerd poll");
654 }
655 continue;
656 }
657
658 /* Take care of high priority channels first. */
659 for (i = 0; i < nb_fd; i++) {
660 switch(pollfd[i].revents) {
661 case POLLERR:
662 ERR("Error returned in polling fd %d.", pollfd[i].fd);
663 kconsumerd_del_fd(local_kconsumerd_fd[i]);
664 num_hup++;
665 break;
666 case POLLHUP:
667 DBG("Polling fd %d tells it has hung up.", pollfd[i].fd);
668 kconsumerd_del_fd(local_kconsumerd_fd[i]);
669 num_hup++;
670 break;
671 case POLLNVAL:
672 ERR("Polling fd %d tells fd is not open.", pollfd[i].fd);
673 kconsumerd_del_fd(local_kconsumerd_fd[i]);
674 num_hup++;
675 break;
676 case POLLPRI:
677 DBG("Urgent read on fd %d", pollfd[i].fd);
678 high_prio = 1;
679 ret = ctx->on_buffer_ready(local_kconsumerd_fd[i]);
680 /* it's ok to have an unavailable sub-buffer */
681 if (ret == EAGAIN) {
682 ret = 0;
683 }
684 break;
685 }
686 }
687
688 /* If every buffer FD has hung up, we end the read loop here */
689 if (nb_fd > 0 && num_hup == nb_fd) {
690 DBG("every buffer FD has hung up\n");
691 if (kconsumerd_quit == 1) {
692 goto end;
693 }
694 continue;
695 }
696
697 /* Take care of low priority channels. */
698 if (high_prio == 0) {
699 for (i = 0; i < nb_fd; i++) {
700 if (pollfd[i].revents == POLLIN) {
701 DBG("Normal read on fd %d", pollfd[i].fd);
702 ret = ctx->on_buffer_ready(local_kconsumerd_fd[i]);
703 /* it's ok to have an unavailable subbuffer */
704 if (ret == EAGAIN) {
705 ret = 0;
706 }
707 }
708 }
709 }
710 }
711 end:
712 DBG("polling thread exiting");
713 if (pollfd != NULL) {
714 free(pollfd);
715 pollfd = NULL;
716 }
717 if (local_kconsumerd_fd != NULL) {
718 free(local_kconsumerd_fd);
719 local_kconsumerd_fd = NULL;
720 }
721 return NULL;
722 }
723
724 /*
725 * Initialise the necessary environnement :
726 * - create a new context
727 * - create the poll_pipe
728 * - create the should_quit pipe (for signal handler)
729 * - create the thread pipe (for splice)
730 *
731 * Takes a function pointer as argument, this function is called when data is
732 * available on a buffer. This function is responsible to do the
733 * kernctl_get_next_subbuf, read the data with mmap or splice depending on the
734 * buffer configuration and then kernctl_put_next_subbuf at the end.
735 *
736 * Returns a pointer to the new context or NULL on error.
737 */
738 struct lttng_kconsumerd_local_data *lttng_kconsumerd_create(
739 int (*buffer_ready)(struct lttng_kconsumerd_fd *kconsumerd_fd))
740 {
741 int ret;
742 struct lttng_kconsumerd_local_data *ctx;
743
744 ctx = malloc(sizeof(struct lttng_kconsumerd_local_data));
745 if (ctx == NULL) {
746 perror("allocating context");
747 goto end;
748 }
749
750 ctx->on_buffer_ready = buffer_ready;
751
752 ret = pipe(ctx->kconsumerd_poll_pipe);
753 if (ret < 0) {
754 perror("Error creating poll pipe");
755 ctx = NULL;
756 goto end;
757 }
758
759 ret = pipe(ctx->kconsumerd_should_quit);
760 if (ret < 0) {
761 perror("Error creating recv pipe");
762 ctx = NULL;
763 goto end;
764 }
765
766 ret = pipe(ctx->kconsumerd_thread_pipe);
767 if (ret < 0) {
768 perror("Error creating thread pipe");
769 ctx = NULL;
770 goto end;
771 }
772
773 end:
774 return ctx;
775 }
776
777 /*
778 * Close all fds associated with the instance and free the context.
779 */
780 void lttng_kconsumerd_destroy(struct lttng_kconsumerd_local_data *ctx)
781 {
782 close(ctx->kconsumerd_error_socket);
783 close(ctx->kconsumerd_thread_pipe[0]);
784 close(ctx->kconsumerd_thread_pipe[1]);
785 close(ctx->kconsumerd_poll_pipe[0]);
786 close(ctx->kconsumerd_poll_pipe[1]);
787 close(ctx->kconsumerd_should_quit[0]);
788 close(ctx->kconsumerd_should_quit[1]);
789 unlink(ctx->kconsumerd_command_sock_path);
790 free(ctx);
791 ctx = NULL;
792 }
793
794 /*
795 * This thread listens on the consumerd socket and receives the file
796 * descriptors from the session daemon.
797 */
798 void *lttng_kconsumerd_thread_receive_fds(void *data)
799 {
800 int sock, client_socket, ret;
801 struct lttcomm_kconsumerd_header tmp;
802 /*
803 * structure to poll for incoming data on communication socket avoids
804 * making blocking sockets.
805 */
806 struct pollfd kconsumerd_sockpoll[2];
807 struct lttng_kconsumerd_local_data *ctx = data;
808
809
810 DBG("Creating command socket %s", ctx->kconsumerd_command_sock_path);
811 unlink(ctx->kconsumerd_command_sock_path);
812 client_socket = lttcomm_create_unix_sock(ctx->kconsumerd_command_sock_path);
813 if (client_socket < 0) {
814 ERR("Cannot create command socket");
815 goto end;
816 }
817
818 ret = lttcomm_listen_unix_sock(client_socket);
819 if (ret < 0) {
820 goto end;
821 }
822
823 DBG("Sending ready command to ltt-sessiond");
824 ret = lttng_kconsumerd_send_error(ctx, KCONSUMERD_COMMAND_SOCK_READY);
825 if (ret < 0) {
826 ERR("Error sending ready command to ltt-sessiond");
827 goto end;
828 }
829
830 ret = fcntl(client_socket, F_SETFL, O_NONBLOCK);
831 if (ret < 0) {
832 perror("fcntl O_NONBLOCK");
833 goto end;
834 }
835
836 /* prepare the FDs to poll : to client socket and the should_quit pipe */
837 kconsumerd_sockpoll[0].fd = ctx->kconsumerd_should_quit[0];
838 kconsumerd_sockpoll[0].events = POLLIN | POLLPRI;
839 kconsumerd_sockpoll[1].fd = client_socket;
840 kconsumerd_sockpoll[1].events = POLLIN | POLLPRI;
841
842 if (lttng_kconsumerd_poll_socket(kconsumerd_sockpoll) < 0) {
843 goto end;
844 }
845 DBG("Connection on client_socket");
846
847 /* Blocking call, waiting for transmission */
848 sock = lttcomm_accept_unix_sock(client_socket);
849 if (sock <= 0) {
850 WARN("On accept");
851 goto end;
852 }
853 ret = fcntl(sock, F_SETFL, O_NONBLOCK);
854 if (ret < 0) {
855 perror("fcntl O_NONBLOCK");
856 goto end;
857 }
858
859 /* update the polling structure to poll on the established socket */
860 kconsumerd_sockpoll[1].fd = sock;
861 kconsumerd_sockpoll[1].events = POLLIN | POLLPRI;
862
863 while (1) {
864 if (lttng_kconsumerd_poll_socket(kconsumerd_sockpoll) < 0) {
865 goto end;
866 }
867 DBG("Incoming fds on sock");
868
869 /* We first get the number of fd we are about to receive */
870 ret = lttcomm_recv_unix_sock(sock, &tmp,
871 sizeof(struct lttcomm_kconsumerd_header));
872 if (ret <= 0) {
873 ERR("Communication interrupted on command socket");
874 goto end;
875 }
876 if (tmp.cmd_type == STOP) {
877 DBG("Received STOP command");
878 goto end;
879 }
880 if (kconsumerd_quit) {
881 DBG("kconsumerd_thread_receive_fds received quit from signal");
882 goto end;
883 }
884
885 /* we received a command to add or update fds */
886 ret = kconsumerd_consumerd_recv_fd(ctx, sock, kconsumerd_sockpoll,
887 tmp.payload_size, tmp.cmd_type);
888 if (ret < 0) {
889 ERR("Receiving the FD, exiting");
890 goto end;
891 }
892 DBG("received fds on sock");
893 }
894
895 end:
896 DBG("kconsumerd_thread_receive_fds exiting");
897
898 /*
899 * when all fds have hung up, the polling thread
900 * can exit cleanly
901 */
902 kconsumerd_quit = 1;
903
904 /*
905 * 2s of grace period, if no polling events occur during
906 * this period, the polling thread will exit even if there
907 * are still open FDs (should not happen, but safety mechanism).
908 */
909 kconsumerd_poll_timeout = LTTNG_KCONSUMERD_POLL_GRACE_PERIOD;
910
911 /* wake up the polling thread */
912 ret = write(ctx->kconsumerd_poll_pipe[1], "4", 1);
913 if (ret < 0) {
914 perror("poll pipe write");
915 }
916 return NULL;
917 }
918
919 /*
920 * Close all the tracefiles and stream fds, should be called when all instances
921 * are destroyed.
922 */
923 void lttng_kconsumerd_cleanup(void)
924 {
925 struct lttng_kconsumerd_fd *iter, *tmp;
926
927 /*
928 * close all outfd. Called when there are no more threads
929 * running (after joining on the threads), no need to protect
930 * list iteration with mutex.
931 */
932 cds_list_for_each_entry_safe(iter, tmp,
933 &kconsumerd_data.fd_list.head, list) {
934 kconsumerd_del_fd(iter);
935 }
936 }
937
938 /*
939 * Called from signal handler.
940 */
941 void lttng_kconsumerd_should_exit(struct lttng_kconsumerd_local_data *ctx)
942 {
943 int ret;
944 kconsumerd_quit = 1;
945 ret = write(ctx->kconsumerd_should_quit[1], "4", 1);
946 if (ret < 0) {
947 perror("write kconsumerd quit");
948 }
949 }
950
951 /*
952 * Send return code to the session daemon.
953 */
954 int lttng_kconsumerd_send_error(
955 struct lttng_kconsumerd_local_data *ctx, int cmd)
956 {
957 if (ctx->kconsumerd_error_socket > 0) {
958 return lttcomm_send_unix_sock(ctx->kconsumerd_error_socket, &cmd,
959 sizeof(enum lttcomm_sessiond_command));
960 }
961
962 return 0;
963 }
This page took 0.0802659999999999 seconds and 4 git commands to generate.