Move to kernel style SPDX license identifiers
[lttng-ust.git] / libringbuffer / shm.c
index 995665ae16f4ee0b4836b3ffaf047bfb76ec3193..3dbb9f4928a6d81b43c01fc1dbd4dbd3620ebf0c 100644 (file)
@@ -1,27 +1,15 @@
 /*
- * libringbuffer/shm.c
+ * SPDX-License-Identifier: LGPL-2.1-only
  *
  * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; only
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#define _LGPL_SOURCE
 #include "shm.h"
 #include <unistd.h>
 #include <fcntl.h>
 #include <sys/mman.h>
+#include <sys/types.h>
 #include <sys/stat.h>  /* For mode constants */
 #include <fcntl.h>     /* For O_* constants */
 #include <assert.h>
 #include <signal.h>
 #include <dirent.h>
 #include <lttng/align.h>
-#include <helper.h>
 #include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#ifdef HAVE_LIBNUMA
+#include <numa.h>
+#include <numaif.h>
+#endif
 #include <helper.h>
+#include <ust-fd.h>
+#include "mmap.h"
 
 /*
  * Ensure we have the required amount of space available by writing 0
@@ -77,20 +72,23 @@ struct shm_object_table *shm_object_table_create(size_t max_nb_obj)
 
        table = zmalloc(sizeof(struct shm_object_table) +
                        max_nb_obj * sizeof(table->objects[0]));
+       if (!table)
+               return NULL;
        table->size = max_nb_obj;
        return table;
 }
 
 static
 struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
-                                          size_t memory_map_size)
+                                          size_t memory_map_size,
+                                          int stream_fd)
 {
-       int shmfd, waitfd[2], ret, i, sigblocked = 0;
+       int shmfd, waitfd[2], ret, i;
        struct shm_object *obj;
        char *memory_map;
-       char tmp_name[NAME_MAX] = "/ust-shm-tmp-XXXXXX";
-       sigset_t all_sigs, orig_sigs;
 
+       if (stream_fd < 0)
+               return NULL;
        if (table->allocated_len >= table->size)
                return NULL;
        obj = &table->objects[table->allocated_len];
@@ -116,73 +114,44 @@ struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
        }
        memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
 
-       /* shm_fd: create shm */
-
        /*
-        * Theoretically, we could leak a shm if the application crashes
-        * between open and unlink. Disable signals on this thread for
-        * increased safety against this scenario.
+        * Set POSIX shared memory object size
+        *
+        * First, use ftruncate() to set its size, some implementations won't
+        * allow writes past the size set by ftruncate.
+        * Then, use write() to fill it with zeros, this allows us to fully
+        * allocate it and detect a shortage of shm space without dealing with
+        * a SIGBUS.
         */
-       sigfillset(&all_sigs);
-       ret = pthread_sigmask(SIG_BLOCK, &all_sigs, &orig_sigs);
-       if (ret == -1) {
-               PERROR("pthread_sigmask");
-               goto error_pthread_sigmask;
-       }
-       sigblocked = 1;
 
-       /*
-        * Allocate shm, and immediately unlink its shm oject, keeping
-        * only the file descriptor as a reference to the object. If it
-        * already exists (caused by short race window during which the
-        * global object exists in a concurrent shm_open), simply retry.
-        * We specifically do _not_ use the / at the beginning of the
-        * pathname so that some OS implementations can keep it local to
-        * the process (POSIX leaves this implementation-defined).
-        */
-       do {
-               /*
-                * Using mktemp filename with O_CREAT | O_EXCL open
-                * flags.
-                */
-               (void) mktemp(tmp_name);
-               if (tmp_name[0] == '\0') {
-                       PERROR("mktemp");
-                       goto error_shm_open;
-               }
-               shmfd = shm_open(tmp_name,
-                                O_CREAT | O_EXCL | O_RDWR, 0700);
-       } while (shmfd < 0 && (errno == EEXIST || errno == EACCES));
-       if (shmfd < 0) {
-               PERROR("shm_open");
-               goto error_shm_open;
-       }
-       ret = shm_unlink(tmp_name);
-       if (ret < 0 && errno != ENOENT) {
-               PERROR("shm_unlink");
-               goto error_shm_release;
-       }
-       sigblocked = 0;
-       ret = pthread_sigmask(SIG_SETMASK, &orig_sigs, NULL);
-       if (ret == -1) {
-               PERROR("pthread_sigmask");
-               goto error_sigmask_release;
+       shmfd = stream_fd;
+       ret = ftruncate(shmfd, memory_map_size);
+       if (ret) {
+               PERROR("ftruncate");
+               goto error_ftruncate;
        }
        ret = zero_file(shmfd, memory_map_size);
        if (ret) {
                PERROR("zero_file");
                goto error_zero_file;
        }
-       ret = ftruncate(shmfd, memory_map_size);
-       if (ret) {
-               PERROR("ftruncate");
-               goto error_ftruncate;
+
+       /*
+        * Also ensure the file metadata is synced with the storage by using
+        * fsync(2). Some platforms don't allow fsync on POSIX shm fds, ignore
+        * EINVAL accordingly.
+        */
+       ret = fsync(shmfd);
+       if (ret && errno != EINVAL) {
+               PERROR("fsync");
+               goto error_fsync;
        }
+       obj->shm_fd_ownership = 0;
        obj->shm_fd = shmfd;
 
        /* memory_map: mmap */
        memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
-                         MAP_SHARED, shmfd, 0);
+                         MAP_SHARED | LTTNG_MAP_POPULATE, shmfd, 0);
        if (memory_map == MAP_FAILED) {
                PERROR("mmap");
                goto error_mmap;
@@ -196,23 +165,9 @@ struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
        return obj;
 
 error_mmap:
+error_fsync:
 error_ftruncate:
-error_shm_release:
 error_zero_file:
-error_sigmask_release:
-       ret = close(shmfd);
-       if (ret) {
-               PERROR("close");
-               assert(0);
-       }
-error_shm_open:
-       if (sigblocked) {
-               ret = pthread_sigmask(SIG_SETMASK, &orig_sigs, NULL);
-               if (ret == -1) {
-                       PERROR("pthread_sigmask");
-               }
-       }
-error_pthread_sigmask:
 error_fcntl:
        for (i = 0; i < 2; i++) {
                ret = close(waitfd[i]);
@@ -264,6 +219,7 @@ struct shm_object *_shm_object_table_alloc_mem(struct shm_object_table *table,
 
        /* no shm_fd */
        obj->shm_fd = -1;
+       obj->shm_fd_ownership = 0;
 
        obj->type = SHM_OBJECT_MEM;
        obj->memory_map = memory_map;
@@ -287,19 +243,63 @@ alloc_error:
        return NULL;
 }
 
+/*
+ * libnuma prints errors on the console even for numa_available().
+ * Work-around this limitation by using get_mempolicy() directly to
+ * check whether the kernel supports mempolicy.
+ */
+#ifdef HAVE_LIBNUMA
+static bool lttng_is_numa_available(void)
+{
+       int ret;
+
+       ret = get_mempolicy(NULL, NULL, 0, NULL, 0);
+       if (ret && errno == ENOSYS) {
+               return false;
+       }
+       return numa_available() > 0;
+}
+#endif
+
 struct shm_object *shm_object_table_alloc(struct shm_object_table *table,
                        size_t memory_map_size,
-                       enum shm_object_type type)
+                       enum shm_object_type type,
+                       int stream_fd,
+                       int cpu)
 {
+       struct shm_object *shm_object;
+#ifdef HAVE_LIBNUMA
+       int oldnode = 0, node;
+       bool numa_avail;
+
+       numa_avail = lttng_is_numa_available();
+       if (numa_avail) {
+               oldnode = numa_preferred();
+               if (cpu >= 0) {
+                       node = numa_node_of_cpu(cpu);
+                       if (node >= 0)
+                               numa_set_preferred(node);
+               }
+               if (cpu < 0 || node < 0)
+                       numa_set_localalloc();
+       }
+#endif /* HAVE_LIBNUMA */
        switch (type) {
        case SHM_OBJECT_SHM:
-               return _shm_object_table_alloc_shm(table, memory_map_size);
+               shm_object = _shm_object_table_alloc_shm(table, memory_map_size,
+                               stream_fd);
+               break;
        case SHM_OBJECT_MEM:
-               return _shm_object_table_alloc_mem(table, memory_map_size);
+               shm_object = _shm_object_table_alloc_mem(table, memory_map_size);
+               break;
        default:
                assert(0);
        }
-       return NULL;
+#ifdef HAVE_LIBNUMA
+       if (numa_avail)
+               numa_set_preferred(oldnode);
+#endif /* HAVE_LIBNUMA */
+       return shm_object;
 }
 
 struct shm_object *shm_object_table_append_shm(struct shm_object_table *table,
@@ -322,12 +322,8 @@ struct shm_object *shm_object_table_append_shm(struct shm_object_table *table,
        obj->wait_fd[0] = -1;   /* read end is unset */
        obj->wait_fd[1] = wakeup_fd;
        obj->shm_fd = shm_fd;
+       obj->shm_fd_ownership = 1;
 
-       ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC);
-       if (ret < 0) {
-               PERROR("fcntl");
-               goto error_fcntl;
-       }
        /* The write end of the pipe needs to be non-blocking */
        ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
        if (ret < 0) {
@@ -337,7 +333,7 @@ struct shm_object *shm_object_table_append_shm(struct shm_object_table *table,
 
        /* memory_map: mmap */
        memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
-                         MAP_SHARED, shm_fd, 0);
+                         MAP_SHARED | LTTNG_MAP_POPULATE, shm_fd, 0);
        if (memory_map == MAP_FAILED) {
                PERROR("mmap");
                goto error_mmap;
@@ -371,6 +367,7 @@ struct shm_object *shm_object_table_append_mem(struct shm_object_table *table,
        obj->wait_fd[0] = -1;   /* read end is unset */
        obj->wait_fd[1] = wakeup_fd;
        obj->shm_fd = -1;
+       obj->shm_fd_ownership = 0;
 
        ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC);
        if (ret < 0) {
@@ -397,7 +394,7 @@ error_fcntl:
 }
 
 static
-void shmp_object_destroy(struct shm_object *obj)
+void shmp_object_destroy(struct shm_object *obj, int consumer)
 {
        switch (obj->type) {
        case SHM_OBJECT_SHM:
@@ -409,18 +406,46 @@ void shmp_object_destroy(struct shm_object *obj)
                        PERROR("umnmap");
                        assert(0);
                }
-               ret = close(obj->shm_fd);
-               if (ret) {
-                       PERROR("close");
-                       assert(0);
+
+               if (obj->shm_fd_ownership) {
+                       /* Delete FDs only if called from app (not consumer). */
+                       if (!consumer) {
+                               lttng_ust_lock_fd_tracker();
+                               ret = close(obj->shm_fd);
+                               if (!ret) {
+                                       lttng_ust_delete_fd_from_tracker(obj->shm_fd);
+                               } else {
+                                       PERROR("close");
+                                       assert(0);
+                               }
+                               lttng_ust_unlock_fd_tracker();
+                       } else {
+                               ret = close(obj->shm_fd);
+                               if (ret) {
+                                       PERROR("close");
+                                       assert(0);
+                               }
+                       }
                }
                for (i = 0; i < 2; i++) {
                        if (obj->wait_fd[i] < 0)
                                continue;
-                       ret = close(obj->wait_fd[i]);
-                       if (ret) {
-                               PERROR("close");
-                               assert(0);
+                       if (!consumer) {
+                               lttng_ust_lock_fd_tracker();
+                               ret = close(obj->wait_fd[i]);
+                               if (!ret) {
+                                       lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
+                               } else {
+                                       PERROR("close");
+                                       assert(0);
+                               }
+                               lttng_ust_unlock_fd_tracker();
+                       } else {
+                               ret = close(obj->wait_fd[i]);
+                               if (ret) {
+                                       PERROR("close");
+                                       assert(0);
+                               }
                        }
                }
                break;
@@ -432,10 +457,22 @@ void shmp_object_destroy(struct shm_object *obj)
                for (i = 0; i < 2; i++) {
                        if (obj->wait_fd[i] < 0)
                                continue;
-                       ret = close(obj->wait_fd[i]);
-                       if (ret) {
-                               PERROR("close");
-                               assert(0);
+                       if (!consumer) {
+                               lttng_ust_lock_fd_tracker();
+                               ret = close(obj->wait_fd[i]);
+                               if (!ret) {
+                                       lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
+                               } else {
+                                       PERROR("close");
+                                       assert(0);
+                               }
+                               lttng_ust_unlock_fd_tracker();
+                       } else {
+                               ret = close(obj->wait_fd[i]);
+                               if (ret) {
+                                       PERROR("close");
+                                       assert(0);
+                               }
                        }
                }
                free(obj->memory_map);
@@ -446,12 +483,12 @@ void shmp_object_destroy(struct shm_object *obj)
        }
 }
 
-void shm_object_table_destroy(struct shm_object_table *table)
+void shm_object_table_destroy(struct shm_object_table *table, int consumer)
 {
        int i;
 
        for (i = 0; i < table->allocated_len; i++)
-               shmp_object_destroy(&table->objects[i]);
+               shmp_object_destroy(&table->objects[i], consumer);
        free(table);
 }
 
@@ -477,6 +514,6 @@ struct shm_ref zalloc_shm(struct shm_object *obj, size_t len)
 
 void align_shm(struct shm_object *obj, size_t align)
 {
-       size_t offset_len = offset_align(obj->allocated_len, align);
+       size_t offset_len = lttng_ust_offset_align(obj->allocated_len, align);
        obj->allocated_len += offset_len;
 }
This page took 0.026943 seconds and 4 git commands to generate.