numa support: allow disabling numa support
[lttng-ust.git] / libringbuffer / shm.c
index 90160ce1976c7c7c5a2853eca93229c9b3871460..0153578c93a2cfddf3bdd69ae57324b516086651 100644 (file)
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#define _LGPL_SOURCE
+#include <config.h>
 #include "shm.h"
 #include <unistd.h>
 #include <fcntl.h>
 #include <sys/mman.h>
+#include <sys/types.h>
 #include <sys/stat.h>  /* For mode constants */
 #include <fcntl.h>     /* For O_* constants */
 #include <assert.h>
 #include <signal.h>
 #include <dirent.h>
 #include <lttng/align.h>
-#include <helper.h>
 #include <limits.h>
+#ifdef HAVE_LIBNUMA
+#include <numa.h>
+#endif
 #include <helper.h>
+#include <ust-fd.h>
 
 /*
  * Ensure we have the required amount of space available by writing 0
@@ -77,20 +83,23 @@ struct shm_object_table *shm_object_table_create(size_t max_nb_obj)
 
        table = zmalloc(sizeof(struct shm_object_table) +
                        max_nb_obj * sizeof(table->objects[0]));
+       if (!table)
+               return NULL;
        table->size = max_nb_obj;
        return table;
 }
 
 static
 struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
-                                          size_t memory_map_size)
+                                          size_t memory_map_size,
+                                          int stream_fd)
 {
-       int shmfd, waitfd[2], ret, i, sigblocked = 0;
+       int shmfd, waitfd[2], ret, i;
        struct shm_object *obj;
        char *memory_map;
-       char tmp_name[NAME_MAX] = "/ust-shm-tmp-XXXXXX";
-       sigset_t all_sigs, orig_sigs;
 
+       if (stream_fd < 0)
+               return NULL;
        if (table->allocated_len >= table->size)
                return NULL;
        obj = &table->objects[table->allocated_len];
@@ -116,58 +125,9 @@ struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
        }
        memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
 
-       /* shm_fd: create shm */
-
-       /*
-        * Theoretically, we could leak a shm if the application crashes
-        * between open and unlink. Disable signals on this thread for
-        * increased safety against this scenario.
-        */
-       sigfillset(&all_sigs);
-       ret = pthread_sigmask(SIG_BLOCK, &all_sigs, &orig_sigs);
-       if (ret == -1) {
-               PERROR("pthread_sigmask");
-               goto error_pthread_sigmask;
-       }
-       sigblocked = 1;
+       /* create shm */
 
-       /*
-        * Allocate shm, and immediately unlink its shm oject, keeping
-        * only the file descriptor as a reference to the object. If it
-        * already exists (caused by short race window during which the
-        * global object exists in a concurrent shm_open), simply retry.
-        * We specifically do _not_ use the / at the beginning of the
-        * pathname so that some OS implementations can keep it local to
-        * the process (POSIX leaves this implementation-defined).
-        */
-       do {
-               /*
-                * Using mktemp filename with O_CREAT | O_EXCL open
-                * flags.
-                */
-               mktemp(tmp_name);
-               if (tmp_name[0] == '\0') {
-                       PERROR("mktemp");
-                       goto error_shm_open;
-               }
-               shmfd = shm_open(tmp_name,
-                                O_CREAT | O_EXCL | O_RDWR, 0700);
-       } while (shmfd < 0 && (errno == EEXIST || errno == EACCES));
-       if (shmfd < 0) {
-               PERROR("shm_open");
-               goto error_shm_open;
-       }
-       ret = shm_unlink(tmp_name);
-       if (ret < 0 && errno != ENOENT) {
-               PERROR("shm_unlink");
-               goto error_shm_release;
-       }
-       sigblocked = 0;
-       ret = pthread_sigmask(SIG_SETMASK, &orig_sigs, NULL);
-       if (ret == -1) {
-               PERROR("pthread_sigmask");
-               goto error_sigmask_release;
-       }
+       shmfd = stream_fd;
        ret = zero_file(shmfd, memory_map_size);
        if (ret) {
                PERROR("zero_file");
@@ -178,6 +138,16 @@ struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
                PERROR("ftruncate");
                goto error_ftruncate;
        }
+       /*
+        * Also ensure the file metadata is synced with the storage by using
+        * fsync(2).
+        */
+       ret = fsync(shmfd);
+       if (ret) {
+               PERROR("fsync");
+               goto error_fsync;
+       }
+       obj->shm_fd_ownership = 0;
        obj->shm_fd = shmfd;
 
        /* memory_map: mmap */
@@ -196,23 +166,9 @@ struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
        return obj;
 
 error_mmap:
+error_fsync:
 error_ftruncate:
-error_shm_release:
 error_zero_file:
-error_sigmask_release:
-       ret = close(shmfd);
-       if (ret) {
-               PERROR("close");
-               assert(0);
-       }
-error_shm_open:
-       if (sigblocked) {
-               ret = pthread_sigmask(SIG_SETMASK, &orig_sigs, NULL);
-               if (ret == -1) {
-                       PERROR("pthread_sigmask");
-               }
-       }
-error_pthread_sigmask:
 error_fcntl:
        for (i = 0; i < 2; i++) {
                ret = close(waitfd[i]);
@@ -264,6 +220,7 @@ struct shm_object *_shm_object_table_alloc_mem(struct shm_object_table *table,
 
        /* no shm_fd */
        obj->shm_fd = -1;
+       obj->shm_fd_ownership = 0;
 
        obj->type = SHM_OBJECT_MEM;
        obj->memory_map = memory_map;
@@ -289,17 +246,38 @@ alloc_error:
 
 struct shm_object *shm_object_table_alloc(struct shm_object_table *table,
                        size_t memory_map_size,
-                       enum shm_object_type type)
+                       enum shm_object_type type,
+                       int stream_fd,
+                       int cpu)
 {
+       struct shm_object *shm_object;
+#ifdef HAVE_LIBNUMA
+       int oldnode, node;
+
+       oldnode = numa_preferred();
+       if (cpu >= 0) {
+               node = numa_node_of_cpu(cpu);
+               if (node >= 0)
+                       numa_set_preferred(node);
+       }
+       if (cpu < 0 || node < 0)
+               numa_set_localalloc();
+#endif /* HAVE_LIBNUMA */
        switch (type) {
        case SHM_OBJECT_SHM:
-               return _shm_object_table_alloc_shm(table, memory_map_size);
+               shm_object = _shm_object_table_alloc_shm(table, memory_map_size,
+                               stream_fd);
+               break;
        case SHM_OBJECT_MEM:
-               return _shm_object_table_alloc_mem(table, memory_map_size);
+               shm_object = _shm_object_table_alloc_mem(table, memory_map_size);
+               break;
        default:
                assert(0);
        }
-       return NULL;
+#ifdef HAVE_LIBNUMA
+       numa_set_preferred(oldnode);
+#endif /* HAVE_LIBNUMA */
+       return shm_object;
 }
 
 struct shm_object *shm_object_table_append_shm(struct shm_object_table *table,
@@ -322,6 +300,7 @@ struct shm_object *shm_object_table_append_shm(struct shm_object_table *table,
        obj->wait_fd[0] = -1;   /* read end is unset */
        obj->wait_fd[1] = wakeup_fd;
        obj->shm_fd = shm_fd;
+       obj->shm_fd_ownership = 1;
 
        ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC);
        if (ret < 0) {
@@ -371,6 +350,7 @@ struct shm_object *shm_object_table_append_mem(struct shm_object_table *table,
        obj->wait_fd[0] = -1;   /* read end is unset */
        obj->wait_fd[1] = wakeup_fd;
        obj->shm_fd = -1;
+       obj->shm_fd_ownership = 0;
 
        ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC);
        if (ret < 0) {
@@ -397,7 +377,7 @@ error_fcntl:
 }
 
 static
-void shmp_object_destroy(struct shm_object *obj)
+void shmp_object_destroy(struct shm_object *obj, int consumer)
 {
        switch (obj->type) {
        case SHM_OBJECT_SHM:
@@ -409,18 +389,46 @@ void shmp_object_destroy(struct shm_object *obj)
                        PERROR("umnmap");
                        assert(0);
                }
-               ret = close(obj->shm_fd);
-               if (ret) {
-                       PERROR("close");
-                       assert(0);
+
+               if (obj->shm_fd_ownership) {
+                       /* Delete FDs only if called from app (not consumer). */
+                       if (!consumer) {
+                               lttng_ust_lock_fd_tracker();
+                               ret = close(obj->shm_fd);
+                               if (!ret) {
+                                       lttng_ust_delete_fd_from_tracker(obj->shm_fd);
+                               } else {
+                                       PERROR("close");
+                                       assert(0);
+                               }
+                               lttng_ust_unlock_fd_tracker();
+                       } else {
+                               ret = close(obj->shm_fd);
+                               if (ret) {
+                                       PERROR("close");
+                                       assert(0);
+                               }
+                       }
                }
                for (i = 0; i < 2; i++) {
                        if (obj->wait_fd[i] < 0)
                                continue;
-                       ret = close(obj->wait_fd[i]);
-                       if (ret) {
-                               PERROR("close");
-                               assert(0);
+                       if (!consumer) {
+                               lttng_ust_lock_fd_tracker();
+                               ret = close(obj->wait_fd[i]);
+                               if (!ret) {
+                                       lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
+                               } else {
+                                       PERROR("close");
+                                       assert(0);
+                               }
+                               lttng_ust_unlock_fd_tracker();
+                       } else {
+                               ret = close(obj->wait_fd[i]);
+                               if (ret) {
+                                       PERROR("close");
+                                       assert(0);
+                               }
                        }
                }
                break;
@@ -432,10 +440,22 @@ void shmp_object_destroy(struct shm_object *obj)
                for (i = 0; i < 2; i++) {
                        if (obj->wait_fd[i] < 0)
                                continue;
-                       ret = close(obj->wait_fd[i]);
-                       if (ret) {
-                               PERROR("close");
-                               assert(0);
+                       if (!consumer) {
+                               lttng_ust_lock_fd_tracker();
+                               ret = close(obj->wait_fd[i]);
+                               if (!ret) {
+                                       lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
+                               } else {
+                                       PERROR("close");
+                                       assert(0);
+                               }
+                               lttng_ust_unlock_fd_tracker();
+                       } else {
+                               ret = close(obj->wait_fd[i]);
+                               if (ret) {
+                                       PERROR("close");
+                                       assert(0);
+                               }
                        }
                }
                free(obj->memory_map);
@@ -446,12 +466,12 @@ void shmp_object_destroy(struct shm_object *obj)
        }
 }
 
-void shm_object_table_destroy(struct shm_object_table *table)
+void shm_object_table_destroy(struct shm_object_table *table, int consumer)
 {
        int i;
 
        for (i = 0; i < table->allocated_len; i++)
-               shmp_object_destroy(&table->objects[i]);
+               shmp_object_destroy(&table->objects[i], consumer);
        free(table);
 }
 
This page took 0.026396 seconds and 4 git commands to generate.