2 * Copyright (C) 2018 - Jérémie Galarneau <jeremie.galarneau@efficios.com>
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License, version 2 only, as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 51
15 * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 #include <urcu/list.h>
20 #include <urcu/rculfhash.h>
23 #include <sys/types.h>
29 #include "common/macros.h"
30 #include "common/error.h"
31 #include "common/defaults.h"
32 #include "common/hashtable/utils.h"
33 #include "common/hashtable/hashtable.h"
35 #include "fd-tracker.h"
37 /* Tracker lock must be taken by the user. */
38 #define TRACKED_COUNT(tracker) \
39 (tracker->count.suspendable.active + \
40 tracker->count.suspendable.suspended + \
41 tracker->count.unsuspendable)
43 /* Tracker lock must be taken by the user. */
44 #define ACTIVE_COUNT(tracker) \
45 (tracker->count.suspendable.active + \
46 tracker->count.unsuspendable)
48 /* Tracker lock must be taken by the user. */
49 #define SUSPENDED_COUNT(tracker) \
50 (tracker->count.suspendable.suspended)
52 /* Tracker lock must be taken by the user. */
53 #define SUSPENDABLE_COUNT(tracker) \
54 (tracker->count.suspendable.active + \
55 tracker->count.suspendable.suspended)
57 /* Tracker lock must be taken by the user. */
58 #define UNSUSPENDABLE_COUNT(tracker) \
59 (tracker->count.unsuspendable)
66 unsigned int suspended
;
68 unsigned int unsuspendable
;
70 unsigned int capacity
;
74 /* Failures to suspend or restore fs handles. */
78 * The head of the active_handles list is always the least recently
79 * used active handle. When an handle is used, it is removed from the
80 * list and added to the end. When a file has to be suspended, the
81 * first element in the list is "popped", suspended, and added to the
82 * list of suspended handles.
84 struct cds_list_head active_handles
;
85 struct cds_list_head suspended_handles
;
86 struct cds_lfht
*unsuspendable_fds
;
89 struct open_properties
{
99 * A fs_handle is not ref-counted. Therefore, it is assumed that a
100 * handle is never in-use while it is being reclaimed. It can be
101 * shared by multiple threads, but external synchronization is required
102 * to ensure it is not still being used when it is reclaimed (close method).
103 * In this respect, it is not different from a regular file descriptor.
105 * The fs_handle lock always nests _within_ the tracker's lock.
108 pthread_mutex_t lock
;
110 * Weak reference to the tracker. All fs_handles are assumed to have
111 * been closed at the moment of the destruction of the fd_tracker.
113 struct fd_tracker
*tracker
;
114 struct open_properties properties
;
116 /* inode number of the file at the time of the handle's creation. */
119 /* Offset to which the file should be restored. */
121 struct cds_list_head handles_list_node
;
124 struct unsuspendable_fd
{
126 * Accesses are only performed through the tracker, which is protected
131 struct cds_lfht_node tracker_node
;
132 struct rcu_head rcu_head
;
136 pthread_mutex_t lock
;
140 .lock
= PTHREAD_MUTEX_INITIALIZER
,
143 static int match_fd(struct cds_lfht_node
*node
, const void *key
);
144 static void unsuspendable_fd_destroy(struct unsuspendable_fd
*entry
);
145 static struct unsuspendable_fd
*unsuspendable_fd_create(const char *name
,
147 static int open_from_properties(struct open_properties
*properties
);
149 static void fs_handle_log(struct fs_handle
*handle
);
150 static int fs_handle_suspend(struct fs_handle
*handle
);
151 static int fs_handle_restore(struct fs_handle
*handle
);
153 static void fd_tracker_track(struct fd_tracker
*tracker
,
154 struct fs_handle
*handle
);
155 static void fd_tracker_untrack(struct fd_tracker
*tracker
,
156 struct fs_handle
*handle
);
157 static int fd_tracker_suspend_handles(struct fd_tracker
*tracker
,
159 static int fd_tracker_restore_handle(struct fd_tracker
*tracker
,
160 struct fs_handle
*handle
);
162 /* Match function of the tracker's unsuspendable_fds hash table. */
164 int match_fd(struct cds_lfht_node
*node
, const void *key
)
166 struct unsuspendable_fd
*entry
=
167 caa_container_of(node
, struct unsuspendable_fd
, tracker_node
);
169 return hash_match_key_ulong((void *) (unsigned long) entry
->fd
,
174 void delete_unsuspendable_fd(struct rcu_head
*head
)
176 struct unsuspendable_fd
*fd
= caa_container_of(head
,
177 struct unsuspendable_fd
, rcu_head
);
184 void unsuspendable_fd_destroy(struct unsuspendable_fd
*entry
)
189 call_rcu(&entry
->rcu_head
, delete_unsuspendable_fd
);
193 struct unsuspendable_fd
*unsuspendable_fd_create(const char *name
, int fd
)
195 struct unsuspendable_fd
*entry
=
196 zmalloc(sizeof(*entry
));
202 entry
->name
= strdup(name
);
207 cds_lfht_node_init(&entry
->tracker_node
);
211 unsuspendable_fd_destroy(entry
);
216 void fs_handle_log(struct fs_handle
*handle
)
218 pthread_mutex_lock(&handle
->lock
);
219 if (handle
->fd
>= 0) {
220 DBG_NO_LOC(" %s [active, fd %d%s]",
221 handle
->properties
.path
,
223 handle
->in_use
? ", in use" : "");
225 DBG_NO_LOC(" %s [suspended]", handle
->properties
.path
);
227 pthread_mutex_unlock(&handle
->lock
);
231 int fs_handle_suspend(struct fs_handle
*handle
)
236 pthread_mutex_lock(&handle
->lock
);
237 assert(handle
->fd
>= 0);
238 if (handle
->in_use
) {
239 /* This handle can't be suspended as it is currently in use. */
244 ret
= stat(handle
->properties
.path
, &fs_stat
);
246 PERROR("Filesystem handle to %s cannot be suspended as stat() failed",
247 handle
->properties
.path
);
252 if (fs_stat
.st_ino
!= handle
->ino
) {
253 /* Don't suspend as the handle would not be restorable. */
254 WARN("Filesystem handle to %s cannot be suspended as its inode changed",
255 handle
->properties
.path
);
260 handle
->offset
= lseek(handle
->fd
, 0, SEEK_CUR
);
261 if (handle
->offset
== -1) {
262 WARN("Filesystem handle to %s cannot be suspended as lseek() failed to sample its current position",
263 handle
->properties
.path
);
268 ret
= close(handle
->fd
);
270 PERROR("Filesystem handle to %s cannot be suspended as close() failed",
271 handle
->properties
.path
);
275 DBG("Suspended filesystem handle to %s (fd %i) at position %" PRId64
,
276 handle
->properties
.path
, handle
->fd
, handle
->offset
);
280 handle
->tracker
->stats
.errors
++;
282 pthread_mutex_unlock(&handle
->lock
);
286 /* Caller must hold the tracker and handle's locks. */
288 int fs_handle_restore(struct fs_handle
*handle
)
292 assert(handle
->fd
== -1);
293 ret
= open_from_properties(&handle
->properties
);
295 PERROR("Failed to restore filesystem handle to %s, open() failed",
296 handle
->properties
.path
);
302 ret
= lseek(fd
, handle
->offset
, SEEK_SET
);
304 PERROR("Failed to restore filesystem handle to %s, lseek() failed",
305 handle
->properties
.path
);
309 DBG("Restored filesystem handle to %s (fd %i) at position %" PRId64
,
310 handle
->properties
.path
, fd
, handle
->offset
);
322 int open_from_properties(struct open_properties
*properties
)
327 * open() ignores the 'flags' parameter unless the O_CREAT or O_TMPFILE
328 * flags are set. O_TMPFILE would not make sense in the context of a
329 * suspendable fs_handle as it would not be restorable (see OPEN(2)),
330 * thus it is ignored here.
332 if ((properties
->flags
& O_CREAT
) && properties
->mode
.is_set
) {
333 ret
= open(properties
->path
, properties
->flags
,
334 properties
->mode
.value
);
336 ret
= open(properties
->path
, properties
->flags
);
339 * Some flags should not be used beyond the initial open() of a
340 * restorable file system handle. O_CREAT and O_TRUNC must
341 * be cleared since it would be unexpected to re-use them
342 * when the handle is retored:
343 * - O_CREAT should not be needed as the file has been created
344 * on the initial call to open(),
345 * - O_TRUNC would destroy the file's contents by truncating it
348 properties
->flags
&= ~(O_CREAT
| O_TRUNC
);
357 struct fd_tracker
*fd_tracker_create(unsigned int capacity
)
359 struct fd_tracker
*tracker
= zmalloc(sizeof(struct fd_tracker
));
365 pthread_mutex_lock(&seed
.lock
);
366 if (!seed
.initialized
) {
367 seed
.value
= (unsigned long) time(NULL
);
368 seed
.initialized
= true;
370 pthread_mutex_unlock(&seed
.lock
);
372 CDS_INIT_LIST_HEAD(&tracker
->active_handles
);
373 CDS_INIT_LIST_HEAD(&tracker
->suspended_handles
);
374 tracker
->capacity
= capacity
;
375 tracker
->unsuspendable_fds
= cds_lfht_new(DEFAULT_HT_SIZE
, 1, 0,
376 CDS_LFHT_AUTO_RESIZE
| CDS_LFHT_ACCOUNTING
, NULL
);
377 DBG("File descriptor tracker created with a limit of %u simultaneously-opened FDs",
383 void fd_tracker_log(struct fd_tracker
*tracker
)
385 struct fs_handle
*handle
;
386 struct unsuspendable_fd
*unsuspendable_fd
;
387 struct cds_lfht_iter iter
;
389 pthread_mutex_lock(&tracker
->lock
);
390 DBG_NO_LOC("File descriptor tracker");
391 DBG_NO_LOC(" Stats:");
392 DBG_NO_LOC(" uses: %" PRIu64
, tracker
->stats
.uses
);
393 DBG_NO_LOC(" misses: %" PRIu64
, tracker
->stats
.misses
);
394 DBG_NO_LOC(" errors: %" PRIu64
, tracker
->stats
.errors
);
395 DBG_NO_LOC(" Tracked: %u", TRACKED_COUNT(tracker
));
396 DBG_NO_LOC(" active: %u", ACTIVE_COUNT(tracker
));
397 DBG_NO_LOC(" suspendable: %u", SUSPENDABLE_COUNT(tracker
));
398 DBG_NO_LOC(" unsuspendable: %u", UNSUSPENDABLE_COUNT(tracker
));
399 DBG_NO_LOC(" suspended: %u", SUSPENDED_COUNT(tracker
));
400 DBG_NO_LOC(" capacity: %u", tracker
->capacity
);
402 DBG_NO_LOC(" Tracked suspendable file descriptors");
403 cds_list_for_each_entry(handle
, &tracker
->active_handles
,
405 fs_handle_log(handle
);
407 cds_list_for_each_entry(handle
, &tracker
->suspended_handles
,
409 fs_handle_log(handle
);
411 if (!SUSPENDABLE_COUNT(tracker
)) {
415 DBG_NO_LOC(" Tracked unsuspendable file descriptors");
417 cds_lfht_for_each_entry(tracker
->unsuspendable_fds
, &iter
,
418 unsuspendable_fd
, tracker_node
) {
419 DBG_NO_LOC(" %s [active, fd %d]", unsuspendable_fd
->name
? : "Unnamed",
420 unsuspendable_fd
->fd
);
423 if (!UNSUSPENDABLE_COUNT(tracker
)) {
427 pthread_mutex_unlock(&tracker
->lock
);
430 int fd_tracker_destroy(struct fd_tracker
*tracker
)
435 * Refuse to destroy the tracker as fs_handles may still old
436 * weak references to the tracker.
438 pthread_mutex_lock(&tracker
->lock
);
439 if (TRACKED_COUNT(tracker
)) {
440 ERR("A file descriptor leak has been detected: %u tracked file descriptors are still being tracked",
441 TRACKED_COUNT(tracker
));
442 pthread_mutex_unlock(&tracker
->lock
);
443 fd_tracker_log(tracker
);
447 pthread_mutex_unlock(&tracker
->lock
);
449 ret
= cds_lfht_destroy(tracker
->unsuspendable_fds
, NULL
);
451 pthread_mutex_destroy(&tracker
->lock
);
457 struct fs_handle
*fd_tracker_open_fs_handle(struct fd_tracker
*tracker
,
458 const char *path
, int flags
, mode_t
*mode
)
461 struct fs_handle
*handle
= NULL
;
463 struct open_properties properties
= {
464 .path
= strdup(path
),
466 .mode
.is_set
= !!mode
,
467 .mode
.value
= mode
? *mode
: 0,
470 if (!properties
.path
) {
474 pthread_mutex_lock(&tracker
->lock
);
475 if (ACTIVE_COUNT(tracker
) == tracker
->capacity
) {
476 if (tracker
->count
.suspendable
.active
> 0) {
477 ret
= fd_tracker_suspend_handles(tracker
, 1);
483 * There are not enough active suspendable file
484 * descriptors to open a new fd and still accomodate the
485 * tracker's capacity.
487 WARN("Cannot open file system handle, too many unsuspendable file descriptors are opened (%u)",
488 tracker
->count
.unsuspendable
);
494 handle
= zmalloc(sizeof(*handle
));
499 ret
= pthread_mutex_init(&handle
->lock
, NULL
);
501 PERROR("Failed to initialize handle mutex while creating fs handle");
506 handle
->fd
= open_from_properties(&properties
);
507 if (handle
->fd
< 0) {
508 PERROR("Failed to open fs handle to %s, open() returned", path
);
513 handle
->properties
= properties
;
514 properties
.path
= NULL
;
516 if (fstat(handle
->fd
, &fd_stat
)) {
517 PERROR("Failed to retrieve file descriptor inode while creating fs handle, fstat() returned");
521 handle
->ino
= fd_stat
.st_ino
;
523 fd_tracker_track(tracker
, handle
);
524 handle
->tracker
= tracker
;
525 pthread_mutex_unlock(&tracker
->lock
);
527 free(properties
.path
);
530 pthread_mutex_unlock(&tracker
->lock
);
531 (void) fs_handle_close(handle
);
536 /* Caller must hold the tracker's lock. */
538 int fd_tracker_suspend_handles(struct fd_tracker
*tracker
,
541 unsigned int left_to_close
= count
;
542 struct fs_handle
*handle
, *tmp
;
544 cds_list_for_each_entry_safe(handle
, tmp
, &tracker
->active_handles
,
548 fd_tracker_untrack(tracker
, handle
);
549 ret
= fs_handle_suspend(handle
);
550 fd_tracker_track(tracker
, handle
);
555 if (!left_to_close
) {
559 return left_to_close
? -EMFILE
: 0;
562 int fd_tracker_open_unsuspendable_fd(struct fd_tracker
*tracker
,
563 int *out_fds
, const char **names
, unsigned int fd_count
,
564 fd_open_cb open
, void *user_data
)
566 int ret
, user_ret
, i
, fds_to_suspend
;
567 unsigned int active_fds
;
568 struct unsuspendable_fd
*entries
[fd_count
];
570 memset(entries
, 0, sizeof(entries
));
572 pthread_mutex_lock(&tracker
->lock
);
574 active_fds
= ACTIVE_COUNT(tracker
);
575 fds_to_suspend
= (int) active_fds
+ (int) fd_count
- (int) tracker
->capacity
;
576 if (fds_to_suspend
> 0) {
577 if (fds_to_suspend
<= tracker
->count
.suspendable
.active
) {
578 ret
= fd_tracker_suspend_handles(tracker
, fds_to_suspend
);
584 * There are not enough active suspendable file
585 * descriptors to open a new fd and still accomodate the
586 * tracker's capacity.
588 WARN("Cannot open unsuspendable fd, too many unsuspendable file descriptors are opened (%u)",
589 tracker
->count
.unsuspendable
);
595 user_ret
= open(user_data
, out_fds
);
602 * Add the fds returned by the user's callback to the hashtable
603 * of unsuspendable fds.
605 for (i
= 0; i
< fd_count
; i
++) {
606 struct unsuspendable_fd
*entry
=
607 unsuspendable_fd_create(names
? names
[i
] : NULL
,
612 goto end_free_entries
;
618 for (i
= 0; i
< fd_count
; i
++) {
619 struct cds_lfht_node
*node
;
620 struct unsuspendable_fd
*entry
= entries
[i
];
622 node
= cds_lfht_add_unique(
623 tracker
->unsuspendable_fds
,
624 hash_key_ulong((void *) (unsigned long) out_fds
[i
],
627 (void *) (unsigned long) out_fds
[i
],
628 &entry
->tracker_node
);
630 if (node
!= &entry
->tracker_node
) {
633 goto end_free_entries
;
637 tracker
->count
.unsuspendable
+= fd_count
;
641 pthread_mutex_unlock(&tracker
->lock
);
644 for (i
= 0; i
< fd_count
; i
++) {
645 unsuspendable_fd_destroy(entries
[i
]);
650 int fd_tracker_close_unsuspendable_fd(struct fd_tracker
*tracker
,
651 int *fds_in
, unsigned int fd_count
, fd_close_cb close
,
654 int i
, ret
, user_ret
;
658 * Maintain a local copy of fds_in as the user's callback may modify its
659 * contents (e.g. setting the fd(s) to -1 after close).
661 memcpy(fds
, fds_in
, sizeof(*fds
) * fd_count
);
663 pthread_mutex_lock(&tracker
->lock
);
666 /* Let the user close the file descriptors. */
667 user_ret
= close(user_data
, fds_in
);
673 /* Untrack the fds that were just closed by the user's callback. */
674 for (i
= 0; i
< fd_count
; i
++) {
675 struct cds_lfht_node
*node
;
676 struct cds_lfht_iter iter
;
677 struct unsuspendable_fd
*entry
;
679 cds_lfht_lookup(tracker
->unsuspendable_fds
,
680 hash_key_ulong((void *) (unsigned long) fds
[i
],
683 (void *) (unsigned long) fds
[i
],
685 node
= cds_lfht_iter_get_node(&iter
);
687 /* Unknown file descriptor. */
688 WARN("Untracked file descriptor %d passed to fd_tracker_close_unsuspendable_fd()",
693 entry
= caa_container_of(node
,
694 struct unsuspendable_fd
,
697 cds_lfht_del(tracker
->unsuspendable_fds
, node
);
698 unsuspendable_fd_destroy(entry
);
702 tracker
->count
.unsuspendable
-= fd_count
;
706 pthread_mutex_unlock(&tracker
->lock
);
710 /* Caller must have taken the tracker's and handle's locks. */
712 void fd_tracker_track(struct fd_tracker
*tracker
, struct fs_handle
*handle
)
714 if (handle
->fd
>= 0) {
715 tracker
->count
.suspendable
.active
++;
716 cds_list_add_tail(&handle
->handles_list_node
,
717 &tracker
->active_handles
);
719 tracker
->count
.suspendable
.suspended
++;
720 cds_list_add_tail(&handle
->handles_list_node
,
721 &tracker
->suspended_handles
);
725 /* Caller must have taken the tracker's and handle's locks. */
727 void fd_tracker_untrack(struct fd_tracker
*tracker
, struct fs_handle
*handle
)
729 if (handle
->fd
>= 0) {
730 tracker
->count
.suspendable
.active
--;
732 tracker
->count
.suspendable
.suspended
--;
734 cds_list_del(&handle
->handles_list_node
);
737 /* Caller must have taken the tracker's and handle's locks. */
739 int fd_tracker_restore_handle(struct fd_tracker
*tracker
,
740 struct fs_handle
*handle
)
744 fd_tracker_untrack(tracker
, handle
);
745 if (ACTIVE_COUNT(tracker
) >= tracker
->capacity
) {
746 ret
= fd_tracker_suspend_handles(tracker
, 1);
751 ret
= fs_handle_restore(handle
);
753 fd_tracker_track(tracker
, handle
);
754 return ret
? ret
: handle
->fd
;
757 int fs_handle_get_fd(struct fs_handle
*handle
)
762 * TODO This should be optimized as it is a fairly hot path.
763 * The fd-tracker's lock should only be taken when a fs_handle is
764 * restored (slow path). On the fast path (fs_handle is active),
765 * the only effect on the fd_tracker is marking the handle as the
766 * most recently used. Currently, it is done by a call to the
767 * track/untrack helpers, but it should be done atomically.
769 * Note that the lock's nesting order must still be respected here.
770 * The handle's lock nests inside the tracker's lock.
772 pthread_mutex_lock(&handle
->tracker
->lock
);
773 pthread_mutex_lock(&handle
->lock
);
774 assert(!handle
->in_use
);
776 handle
->tracker
->stats
.uses
++;
777 if (handle
->fd
>= 0) {
779 /* Mark as most recently used. */
780 fd_tracker_untrack(handle
->tracker
, handle
);
781 fd_tracker_track(handle
->tracker
, handle
);
783 handle
->tracker
->stats
.misses
++;
784 ret
= fd_tracker_restore_handle(handle
->tracker
, handle
);
786 handle
->tracker
->stats
.errors
++;
790 handle
->in_use
= true;
792 pthread_mutex_unlock(&handle
->lock
);
793 pthread_mutex_unlock(&handle
->tracker
->lock
);
797 void fs_handle_put_fd(struct fs_handle
*handle
)
799 pthread_mutex_lock(&handle
->lock
);
800 handle
->in_use
= false;
801 pthread_mutex_unlock(&handle
->lock
);
804 int fs_handle_close(struct fs_handle
*handle
)
813 pthread_mutex_lock(&handle
->tracker
->lock
);
814 pthread_mutex_lock(&handle
->lock
);
815 fd_tracker_untrack(handle
->tracker
, handle
);
816 if (handle
->fd
>= 0) {
818 * The return value of close() is not propagated as there
819 * isn't much the user can do about it.
821 if (close(handle
->fd
)) {
822 PERROR("Failed to close the file descritptor (%d) of fs handle to %s, close() returned",
823 handle
->fd
, handle
->properties
.path
);
827 pthread_mutex_unlock(&handle
->lock
);
828 pthread_mutex_destroy(&handle
->lock
);
829 pthread_mutex_unlock(&handle
->tracker
->lock
);
830 free(handle
->properties
.path
);