Fix: waiter: futex wait: handle spurious futex wakeups
[lttng-tools.git] / src / common / compat / poll.cpp
CommitLineData
5eb91c98 1/*
21cf9b6b 2 * Copyright (C) 2011 EfficiOS Inc.
ab5be9fa 3 * Copyright (C) 2019 Yannick Lamarre <ylamarre@efficios.com>
5eb91c98 4 *
c922647d 5 * SPDX-License-Identifier: LGPL-2.1-only
5eb91c98 6 *
5eb91c98
DG
7 */
8
6c1c0768 9#define _LGPL_SOURCE
5eb91c98 10#include <stdlib.h>
f057dfc3 11#include <stdbool.h>
5eb91c98 12
c9e313bc
SM
13#include <common/defaults.hpp>
14#include <common/error.hpp>
15#include <common/macros.hpp>
16#include <common/utils.hpp>
5eb91c98 17
c9e313bc 18#include "poll.hpp"
5eb91c98 19
ce7fc42f 20#ifdef HAVE_EPOLL
0060607b
MJ
21
22#include <fcntl.h>
23#include <limits.h>
24#include <sys/types.h>
25#include <sys/stat.h>
26#include <unistd.h>
27
28/*
29 * Maximum number of fd we can monitor.
30 *
31 * For epoll(7), /proc/sys/fs/epoll/max_user_watches (since Linux 2.6.28) will
32 * be used for the maximum size of the poll set. If this interface is not
33 * available, according to the manpage, the max_user_watches value is 1/25 (4%)
34 * of the available low memory divided by the registration cost in bytes which
35 * is 90 bytes on a 32-bit kernel and 160 bytes on a 64-bit kernel.
36 *
37 */
38static unsigned int poll_max_size;
39
40/*
41 * Resize the epoll events structure of the new size.
42 *
43 * Return 0 on success or else -1 with the current events pointer untouched.
44 */
45static int resize_poll_event(struct lttng_poll_event *events,
46 uint32_t new_size)
47{
48 struct epoll_event *ptr;
49
a0377dfe 50 LTTNG_ASSERT(events);
0060607b 51
740da7d5 52 ptr = (epoll_event *) realloc(events->events, new_size * sizeof(*ptr));
0060607b
MJ
53 if (ptr == NULL) {
54 PERROR("realloc epoll add");
55 goto error;
56 }
57 if (new_size > events->alloc_size) {
58 /* Zero newly allocated memory */
59 memset(ptr + events->alloc_size, 0,
60 (new_size - events->alloc_size) * sizeof(*ptr));
61 }
62 events->events = ptr;
63 events->alloc_size = new_size;
64
65 return 0;
66
67error:
68 return -1;
69}
70
71/*
72 * Create epoll set and allocate returned events structure.
73 */
64803277 74int compat_epoll_create(struct lttng_poll_event *events, int count, int flags)
0060607b
MJ
75{
76 int ret;
77
64803277 78 if (events == NULL || count <= 0) {
0060607b
MJ
79 goto error;
80 }
81
82 if (!poll_max_size) {
83 if (lttng_poll_set_max_size()) {
84 goto error;
85 }
86 }
87
88 /* Don't bust the limit here */
64803277
SM
89 if (count > poll_max_size) {
90 count = poll_max_size;
0060607b
MJ
91 }
92
64803277 93 ret = compat_glibc_epoll_create(count, flags);
0060607b
MJ
94 if (ret < 0) {
95 /* At this point, every error is fatal */
96 PERROR("epoll_create1");
97 goto error;
98 }
99
100 events->epfd = ret;
101
102 /* This *must* be freed by using lttng_poll_free() */
64803277 103 events->events = calloc<epoll_event>(count);
0060607b
MJ
104 if (events->events == NULL) {
105 PERROR("zmalloc epoll set");
106 goto error_close;
107 }
108
64803277 109 events->alloc_size = events->init_size = count;
0060607b
MJ
110 events->nb_fd = 0;
111
112 return 0;
113
114error_close:
115 ret = close(events->epfd);
116 if (ret) {
117 PERROR("close");
118 }
119error:
120 return -1;
121}
122
123/*
124 * Add a fd to the epoll set with requesting events.
125 */
0060607b
MJ
126int compat_epoll_add(struct lttng_poll_event *events, int fd, uint32_t req_events)
127{
128 int ret;
129 struct epoll_event ev;
130
131 if (events == NULL || events->events == NULL || fd < 0) {
132 ERR("Bad compat epoll add arguments");
133 goto error;
134 }
135
136 /*
137 * Zero struct epoll_event to ensure all representations of its
138 * union are zeroed.
139 */
140 memset(&ev, 0, sizeof(ev));
141 ev.events = req_events;
142 ev.data.fd = fd;
143
144 ret = epoll_ctl(events->epfd, EPOLL_CTL_ADD, fd, &ev);
145 if (ret < 0) {
146 switch (errno) {
147 case EEXIST:
148 /* If exist, it's OK. */
149 goto end;
150 case ENOSPC:
151 case EPERM:
152 /* Print PERROR and goto end not failing. Show must go on. */
153 PERROR("epoll_ctl ADD");
154 goto end;
155 default:
156 PERROR("epoll_ctl ADD fatal");
157 goto error;
158 }
159 }
160
161 events->nb_fd++;
162
163end:
164 return 0;
165
166error:
167 return -1;
168}
169
170/*
171 * Remove a fd from the epoll set.
172 */
0060607b
MJ
173int compat_epoll_del(struct lttng_poll_event *events, int fd)
174{
175 int ret;
176
177 if (events == NULL || fd < 0 || events->nb_fd == 0) {
178 goto error;
179 }
180
181 ret = epoll_ctl(events->epfd, EPOLL_CTL_DEL, fd, NULL);
182 if (ret < 0) {
183 switch (errno) {
184 case ENOENT:
185 case EPERM:
186 /* Print PERROR and goto end not failing. Show must go on. */
187 PERROR("epoll_ctl DEL");
188 goto end;
189 default:
190 PERROR("epoll_ctl DEL fatal");
191 goto error;
192 }
193 }
194
195 events->nb_fd--;
196
197end:
198 return 0;
199
200error:
201 return -1;
202}
203
204/*
205 * Set an fd's events.
206 */
0060607b
MJ
207int compat_epoll_mod(struct lttng_poll_event *events, int fd, uint32_t req_events)
208{
209 int ret;
210 struct epoll_event ev;
211
212 if (events == NULL || fd < 0 || events->nb_fd == 0) {
213 goto error;
214 }
215
216 /*
217 * Zero struct epoll_event to ensure all representations of its
218 * union are zeroed.
219 */
220 memset(&ev, 0, sizeof(ev));
221 ev.events = req_events;
222 ev.data.fd = fd;
223
224 ret = epoll_ctl(events->epfd, EPOLL_CTL_MOD, fd, &ev);
225 if (ret < 0) {
226 switch (errno) {
227 case ENOENT:
228 case EPERM:
229 /* Print PERROR and goto end not failing. Show must go on. */
230 PERROR("epoll_ctl MOD");
231 goto end;
232 default:
233 PERROR("epoll_ctl MOD fatal");
234 goto error;
235 }
236 }
237
238end:
239 return 0;
240
241error:
242 return -1;
243}
244
245/*
246 * Wait on epoll set. This is a blocking call of timeout value.
247 */
0060607b
MJ
248int compat_epoll_wait(struct lttng_poll_event *events, int timeout,
249 bool interruptible)
250{
251 int ret;
252 uint32_t new_size;
253
254 if (events == NULL || events->events == NULL) {
255 ERR("Wrong arguments in compat_epoll_wait");
256 goto error;
257 }
258
259 if (events->nb_fd == 0) {
260 errno = EINVAL;
261 return -1;
262 }
263
264 /*
265 * Resize if needed before waiting. We could either expand the array or
266 * shrink it down. It's important to note that after this step, we are
267 * ensured that the events argument of the epoll_wait call will be large
268 * enough to hold every possible returned events.
269 */
270 new_size = 1U << utils_get_count_order_u32(events->nb_fd);
271 if (new_size != events->alloc_size && new_size >= events->init_size) {
272 ret = resize_poll_event(events, new_size);
273 if (ret < 0) {
274 /* ENOMEM problem at this point. */
275 goto error;
276 }
277 }
278
279 do {
280 ret = epoll_wait(events->epfd, events->events, events->nb_fd, timeout);
281 } while (!interruptible && ret == -1 && errno == EINTR);
282 if (ret < 0) {
283 if (errno != EINTR) {
284 PERROR("epoll_wait");
285 }
286 goto error;
287 }
288
289 /*
290 * Since the returned events are set sequentially in the "events" structure
291 * we only need to return the epoll_wait value and iterate over it.
292 */
293 return ret;
294
295error:
296 return -1;
297}
298
299/*
300 * Setup poll set maximum size.
301 */
0060607b
MJ
302int compat_epoll_set_max_size(void)
303{
304 int ret, fd, retval = 0;
305 ssize_t size_ret;
306 char buf[64];
307
308 fd = open(COMPAT_EPOLL_PROC_PATH, O_RDONLY);
309 if (fd < 0) {
310 /*
311 * Failing on opening [1] is not an error per see. [1] was
312 * introduced in Linux 2.6.28 but epoll is available since
313 * 2.5.44. Hence, goto end and set a default value without
314 * setting an error return value.
315 *
316 * [1] /proc/sys/fs/epoll/max_user_watches
317 */
318 retval = 0;
319 goto end;
320 }
321
322 size_ret = lttng_read(fd, buf, sizeof(buf));
323 /*
324 * Allow reading a file smaller than buf, but keep space for
325 * final \0.
326 */
327 if (size_ret < 0 || size_ret >= sizeof(buf)) {
328 PERROR("read set max size");
329 retval = -1;
330 goto end_read;
331 }
332 buf[size_ret] = '\0';
333 poll_max_size = atoi(buf);
334end_read:
335 ret = close(fd);
336 if (ret) {
337 PERROR("close");
338 }
339end:
340 if (!poll_max_size) {
341 poll_max_size = DEFAULT_POLL_SIZE;
342 }
343 DBG("epoll set max size is %d", poll_max_size);
344 return retval;
345}
346
347#else /* HAVE_EPOLL */
348
349#include <sys/resource.h>
350#include <sys/time.h>
cc0acbd1
JG
351
352/*
353 * Maximum number of fd we can monitor.
354 *
355 * For poll(2), the max fds must not exceed RLIMIT_NOFILE given by
356 * getrlimit(2).
357 */
358static unsigned int poll_max_size;
5eb91c98 359
d21b0d71
DG
360/*
361 * Resize the epoll events structure of the new size.
362 *
363 * Return 0 on success or else -1 with the current events pointer untouched.
364 */
365static int resize_poll_event(struct compat_poll_event_array *array,
366 uint32_t new_size)
367{
368 struct pollfd *ptr;
369
a0377dfe 370 LTTNG_ASSERT(array);
d21b0d71 371
ac018a8b
DG
372 /* Refuse to resize the array more than the max size. */
373 if (new_size > poll_max_size) {
374 goto error;
375 }
376
9730eb85 377 ptr = (struct pollfd *) realloc(array->events, new_size * sizeof(*ptr));
d21b0d71
DG
378 if (ptr == NULL) {
379 PERROR("realloc epoll add");
380 goto error;
381 }
53efb85a
MD
382 if (new_size > array->alloc_size) {
383 /* Zero newly allocated memory */
384 memset(ptr + array->alloc_size, 0,
385 (new_size - array->alloc_size) * sizeof(*ptr));
386 }
d21b0d71
DG
387 array->events = ptr;
388 array->alloc_size = new_size;
389
390 return 0;
391
392error:
393 return -1;
394}
395
396/*
397 * Update events with the current events object.
398 */
399static int update_current_events(struct lttng_poll_event *events)
400{
401 int ret;
402 struct compat_poll_event_array *current, *wait;
403
a0377dfe 404 LTTNG_ASSERT(events);
d21b0d71
DG
405
406 current = &events->current;
407 wait = &events->wait;
408
409 wait->nb_fd = current->nb_fd;
dbe23f45 410 if (current->alloc_size != wait->alloc_size) {
d21b0d71
DG
411 ret = resize_poll_event(wait, current->alloc_size);
412 if (ret < 0) {
413 goto error;
414 }
415 }
416 memcpy(wait->events, current->events,
417 current->nb_fd * sizeof(*current->events));
418
dbe23f45 419 /* Update is done. */
d21b0d71 420 events->need_update = 0;
d21b0d71
DG
421
422 return 0;
423
424error:
425 return -1;
426}
427
5eb91c98
DG
428/*
429 * Create pollfd data structure.
430 */
431int compat_poll_create(struct lttng_poll_event *events, int size)
432{
d21b0d71
DG
433 struct compat_poll_event_array *current, *wait;
434
5eb91c98
DG
435 if (events == NULL || size <= 0) {
436 ERR("Wrong arguments for poll create");
437 goto error;
438 }
439
dbe23f45 440 if (!poll_max_size) {
c607fe03
MJ
441 if (lttng_poll_set_max_size()) {
442 goto error;
443 }
dbe23f45
MD
444 }
445
5eb91c98
DG
446 /* Don't bust the limit here */
447 if (size > poll_max_size) {
448 size = poll_max_size;
449 }
450
d21b0d71
DG
451 /* Reset everything before begining the allocation. */
452 memset(events, 0, sizeof(struct lttng_poll_event));
453
d21b0d71
DG
454 current = &events->current;
455 wait = &events->wait;
456
5eb91c98 457 /* This *must* be freed by using lttng_poll_free() */
c56f67e5 458 wait->events = calloc<struct pollfd>(size);
d21b0d71 459 if (wait->events == NULL) {
c56f67e5 460 PERROR("Failed to allocate wait events array during poll initialization");
5eb91c98
DG
461 goto error;
462 }
463
d21b0d71
DG
464 wait->alloc_size = wait->init_size = size;
465
c56f67e5 466 current->events = calloc<struct pollfd>(size);
d21b0d71 467 if (current->events == NULL) {
c56f67e5 468 PERROR("Failed to allocate current events array during poll initialization");
d21b0d71
DG
469 goto error;
470 }
471
472 current->alloc_size = current->init_size = size;
5eb91c98
DG
473
474 return 0;
475
476error:
477 return -1;
478}
479
480/*
481 * Add fd to pollfd data structure with requested events.
482 */
483int compat_poll_add(struct lttng_poll_event *events, int fd,
484 uint32_t req_events)
485{
d21b0d71
DG
486 int new_size, ret, i;
487 struct compat_poll_event_array *current;
5eb91c98 488
d21b0d71 489 if (events == NULL || events->current.events == NULL || fd < 0) {
5eb91c98
DG
490 ERR("Bad compat poll add arguments");
491 goto error;
492 }
493
d21b0d71
DG
494 current = &events->current;
495
496 /* Check if fd we are trying to add is already there. */
497 for (i = 0; i < current->nb_fd; i++) {
d21b0d71
DG
498 if (current->events[i].fd == fd) {
499 errno = EEXIST;
5eb91c98
DG
500 goto error;
501 }
5eb91c98
DG
502 }
503
dbe23f45
MD
504 /* Resize array if needed. */
505 new_size = 1U << utils_get_count_order_u32(current->nb_fd + 1);
506 if (new_size != current->alloc_size && new_size >= current->init_size) {
d21b0d71
DG
507 ret = resize_poll_event(current, new_size);
508 if (ret < 0) {
509 goto error;
510 }
d21b0d71 511 }
5eb91c98 512
d21b0d71
DG
513 current->events[current->nb_fd].fd = fd;
514 current->events[current->nb_fd].events = req_events;
515 current->nb_fd++;
516 events->need_update = 1;
517
518 DBG("fd %d of %d added to pollfd", fd, current->nb_fd);
5eb91c98
DG
519
520 return 0;
521
522error:
523 return -1;
524}
525
f057dfc3
JG
526/*
527 * Modify an fd's events..
528 */
529int compat_poll_mod(struct lttng_poll_event *events, int fd,
530 uint32_t req_events)
531{
8a282751 532 int i;
f057dfc3
JG
533 struct compat_poll_event_array *current;
534
a1de8fcc
YL
535 if (events == NULL || events->current.nb_fd == 0 ||
536 events->current.events == NULL || fd < 0) {
f057dfc3
JG
537 ERR("Bad compat poll mod arguments");
538 goto error;
539 }
540
541 current = &events->current;
542
543 for (i = 0; i < current->nb_fd; i++) {
544 if (current->events[i].fd == fd) {
f057dfc3
JG
545 current->events[i].events = req_events;
546 events->need_update = 1;
547 break;
548 }
549 }
550
a1de8fcc
YL
551 /*
552 * The epoll flavor doesn't flag modifying a non-included FD as an
553 * error.
554 */
f057dfc3
JG
555
556 return 0;
557
558error:
559 return -1;
560}
561
5eb91c98
DG
562/*
563 * Remove a fd from the pollfd structure.
564 */
565int compat_poll_del(struct lttng_poll_event *events, int fd)
566{
a1de8fcc
YL
567 int i, count = 0, ret;
568 uint32_t new_size;
d21b0d71 569 struct compat_poll_event_array *current;
5eb91c98 570
a1de8fcc
YL
571 if (events == NULL || events->current.nb_fd == 0 ||
572 events->current.events == NULL || fd < 0) {
5eb91c98
DG
573 goto error;
574 }
575
d21b0d71
DG
576 /* Ease our life a bit. */
577 current = &events->current;
5eb91c98 578
d21b0d71 579 for (i = 0; i < current->nb_fd; i++) {
5eb91c98 580 /* Don't put back the fd we want to delete */
d21b0d71
DG
581 if (current->events[i].fd != fd) {
582 current->events[count].fd = current->events[i].fd;
583 current->events[count].events = current->events[i].events;
5eb91c98
DG
584 count++;
585 }
586 }
a1de8fcc
YL
587
588 /* The fd was not in our set, return no error as with epoll. */
589 if (current->nb_fd == count) {
590 goto end;
591 }
592
dbe23f45 593 /* No fd duplicate should be ever added into array. */
a0377dfe 594 LTTNG_ASSERT(current->nb_fd - 1 == count);
dbe23f45
MD
595 current->nb_fd = count;
596
597 /* Resize array if needed. */
598 new_size = 1U << utils_get_count_order_u32(current->nb_fd);
a1de8fcc
YL
599 if (new_size != current->alloc_size && new_size >= current->init_size
600 && current->nb_fd != 0) {
dbe23f45
MD
601 ret = resize_poll_event(current, new_size);
602 if (ret < 0) {
603 goto error;
604 }
605 }
5eb91c98 606
d21b0d71 607 events->need_update = 1;
5eb91c98 608
a1de8fcc 609end:
5eb91c98
DG
610 return 0;
611
612error:
613 return -1;
614}
615
616/*
617 * Wait on poll() with timeout. Blocking call.
618 */
9f32e9bf
MD
619int compat_poll_wait(struct lttng_poll_event *events, int timeout,
620 bool interruptible)
5eb91c98 621{
22a73671 622 int ret, active_fd_count;
3e75a9b7 623 size_t pos = 0, consecutive_entries = 0, non_idle_pos;
5eb91c98 624
d21b0d71 625 if (events == NULL || events->current.events == NULL) {
5eb91c98
DG
626 ERR("poll wait arguments error");
627 goto error;
628 }
629
d21b0d71
DG
630 if (events->current.nb_fd == 0) {
631 /* Return an invalid error to be consistent with epoll. */
632 errno = EINVAL;
dbe23f45 633 events->wait.nb_fd = 0;
d21b0d71
DG
634 goto error;
635 }
636
637 if (events->need_update) {
638 ret = update_current_events(events);
639 if (ret < 0) {
640 errno = ENOMEM;
641 goto error;
642 }
643 }
644
a9b0dbc2
JG
645 do {
646 ret = poll(events->wait.events, events->wait.nb_fd, timeout);
9f32e9bf 647 } while (!interruptible && ret == -1 && errno == EINTR);
5eb91c98 648 if (ret < 0) {
9f32e9bf
MD
649 if (errno != EINTR) {
650 PERROR("poll wait");
651 }
5eb91c98
DG
652 goto error;
653 }
654
22a73671
YL
655 active_fd_count = ret;
656
9ddba525 657 /*
3e75a9b7
JG
658 * Move all active pollfd structs to the beginning of the
659 * array to emulate compat-epoll behaviour.
9ddba525 660 */
22a73671
YL
661 if (active_fd_count == events->wait.nb_fd) {
662 goto end;
663 }
22a73671 664
3e75a9b7
JG
665 while (consecutive_entries != active_fd_count) {
666 struct pollfd *current = &events->wait.events[pos];
667 struct pollfd idle_entry;
22a73671 668
3e75a9b7
JG
669 if (current->revents != 0) {
670 consecutive_entries++;
671 pos++;
672 continue;
22a73671 673 }
22a73671 674
3e75a9b7
JG
675 non_idle_pos = pos;
676
677 /* Look for next non-idle entry. */
678 while (events->wait.events[++non_idle_pos].revents == 0);
679
680 /* Swap idle and non-idle entries. */
681 idle_entry = *current;
682 *current = events->wait.events[non_idle_pos];
683 events->wait.events[non_idle_pos] = idle_entry;
684
685 consecutive_entries++;
686 pos++;
687 }
22a73671
YL
688end:
689 return ret;
5eb91c98
DG
690
691error:
692 return -1;
693}
694
695/*
696 * Setup poll set maximum size.
697 */
dbe23f45 698int compat_poll_set_max_size(void)
5eb91c98 699{
dbe23f45 700 int ret, retval = 0;
5eb91c98
DG
701 struct rlimit lim;
702
5eb91c98
DG
703 ret = getrlimit(RLIMIT_NOFILE, &lim);
704 if (ret < 0) {
6f04ed72 705 PERROR("getrlimit poll RLIMIT_NOFILE");
dbe23f45
MD
706 retval = -1;
707 goto end;
5eb91c98
DG
708 }
709
710 poll_max_size = lim.rlim_cur;
dbe23f45 711end:
d21b0d71 712 if (poll_max_size == 0) {
990570ed 713 poll_max_size = DEFAULT_POLL_SIZE;
5eb91c98 714 }
5eb91c98 715 DBG("poll set max size set to %u", poll_max_size);
dbe23f45 716 return retval;
5eb91c98 717}
0060607b
MJ
718
719#endif /* !HAVE_EPOLL */
This page took 0.107877 seconds and 4 git commands to generate.