Rename "tsc" to "timestamp"
[lttng-ust.git] / src / common / ringbuffer / shm.c
CommitLineData
1d498196 1/*
c0c0989a 2 * SPDX-License-Identifier: LGPL-2.1-only
1d498196 3 *
e92f3e28 4 * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
1d498196
MD
5 */
6
3fbec7dc 7#define _LGPL_SOURCE
1d498196
MD
8#include "shm.h"
9#include <unistd.h>
10#include <fcntl.h>
11#include <sys/mman.h>
a9ff648c 12#include <sys/types.h>
1d498196
MD
13#include <sys/stat.h> /* For mode constants */
14#include <fcntl.h> /* For O_* constants */
15#include <assert.h>
8da6cd6d
MD
16#include <stdio.h>
17#include <signal.h>
18#include <dirent.h>
96e80018 19#include <limits.h>
8a208943 20#include <stdbool.h>
fb31eb73 21#include <stdint.h>
3d3a2bb8 22
bfcda6ce 23#ifdef HAVE_LIBNUMA
4b68c31f 24#include <numa.h>
8a208943 25#include <numaif.h>
bfcda6ce 26#endif
3d3a2bb8 27
eae3c729 28#include <lttng/ust-utils.h>
3d3a2bb8 29
9d315d6d
MJ
30#include "common/macros.h"
31#include "common/ust-fd.h"
1be43539 32#include "common/compat/mmap.h"
3a81f31d
MD
33
34/*
35 * Ensure we have the required amount of space available by writing 0
36 * into the entire buffer. Not doing so can trigger SIGBUS when going
37 * beyond the available shm space.
38 */
39static
40int zero_file(int fd, size_t len)
41{
42 ssize_t retlen;
43 size_t written = 0;
44 char *zeropage;
45 long pagelen;
46 int ret;
47
48 pagelen = sysconf(_SC_PAGESIZE);
49 if (pagelen < 0)
50 return (int) pagelen;
51 zeropage = calloc(pagelen, 1);
52 if (!zeropage)
53 return -ENOMEM;
54
55 while (len > written) {
56 do {
57 retlen = write(fd, zeropage,
58 min_t(size_t, pagelen, len - written));
59 } while (retlen == -1UL && errno == EINTR);
60 if (retlen < 0) {
61 ret = (int) retlen;
62 goto error;
63 }
64 written += retlen;
65 }
66 ret = 0;
67error:
68 free(zeropage);
69 return ret;
70}
1d498196 71
97572c04 72struct shm_object_table *shm_object_table_create(size_t max_nb_obj, bool populate)
1d498196
MD
73{
74 struct shm_object_table *table;
75
97572c04
MD
76 table = zmalloc_populate(sizeof(struct shm_object_table) +
77 max_nb_obj * sizeof(table->objects[0]), populate);
74d48abe
MD
78 if (!table)
79 return NULL;
1d498196
MD
80 table->size = max_nb_obj;
81 return table;
82}
83
74d81a6c
MD
84static
85struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
a9ff648c 86 size_t memory_map_size,
97572c04
MD
87 int stream_fd,
88 bool populate)
1d498196 89{
5ea386c3 90 int shmfd, waitfd[2], ret, i;
97572c04 91 int flags = MAP_SHARED;
1d498196
MD
92 struct shm_object *obj;
93 char *memory_map;
94
5ea386c3
MD
95 if (stream_fd < 0)
96 return NULL;
1d498196
MD
97 if (table->allocated_len >= table->size)
98 return NULL;
7a9c21bd 99 obj = &table->objects[table->allocated_len];
1d498196
MD
100
101 /* wait_fd: create pipe */
8a8c2117 102 ret = pipe2(waitfd, O_CLOEXEC);
1d498196
MD
103 if (ret < 0) {
104 PERROR("pipe");
105 goto error_pipe;
106 }
5d61a504
MD
107 /* The write end of the pipe needs to be non-blocking */
108 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
109 if (ret < 0) {
110 PERROR("fcntl");
111 goto error_fcntl;
112 }
7a9c21bd 113 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
1d498196 114
053e6e24
MJ
115 /*
116 * Set POSIX shared memory object size
117 *
118 * First, use ftruncate() to set its size, some implementations won't
119 * allow writes past the size set by ftruncate.
120 * Then, use write() to fill it with zeros, this allows us to fully
121 * allocate it and detect a shortage of shm space without dealing with
122 * a SIGBUS.
123 */
a9ff648c 124
5ea386c3 125 shmfd = stream_fd;
1d498196
MD
126 ret = ftruncate(shmfd, memory_map_size);
127 if (ret) {
128 PERROR("ftruncate");
129 goto error_ftruncate;
130 }
053e6e24
MJ
131 ret = zero_file(shmfd, memory_map_size);
132 if (ret) {
133 PERROR("zero_file");
134 goto error_zero_file;
135 }
71be0c53 136
d0f6cf57
MD
137 /*
138 * Also ensure the file metadata is synced with the storage by using
71be0c53
MJ
139 * fsync(2). Some platforms don't allow fsync on POSIX shm fds, ignore
140 * EINVAL accordingly.
d0f6cf57
MD
141 */
142 ret = fsync(shmfd);
71be0c53 143 if (ret && errno != EINVAL) {
d0f6cf57
MD
144 PERROR("fsync");
145 goto error_fsync;
146 }
5ea386c3 147 obj->shm_fd_ownership = 0;
1d498196
MD
148 obj->shm_fd = shmfd;
149
97572c04
MD
150 if (populate)
151 flags |= LTTNG_MAP_POPULATE;
1d498196
MD
152 /* memory_map: mmap */
153 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
97572c04 154 flags, shmfd, 0);
1d498196
MD
155 if (memory_map == MAP_FAILED) {
156 PERROR("mmap");
157 goto error_mmap;
158 }
74d81a6c 159 obj->type = SHM_OBJECT_SHM;
1d498196
MD
160 obj->memory_map = memory_map;
161 obj->memory_map_size = memory_map_size;
162 obj->allocated_len = 0;
dc613eb9 163 obj->index = table->allocated_len++;
7a9c21bd 164
1d498196
MD
165 return obj;
166
167error_mmap:
d0f6cf57 168error_fsync:
1d498196 169error_ftruncate:
3a81f31d 170error_zero_file:
1d498196
MD
171error_fcntl:
172 for (i = 0; i < 2; i++) {
173 ret = close(waitfd[i]);
174 if (ret) {
175 PERROR("close");
176 assert(0);
177 }
178 }
179error_pipe:
1d498196 180 return NULL;
1d498196
MD
181}
182
74d81a6c
MD
183static
184struct shm_object *_shm_object_table_alloc_mem(struct shm_object_table *table,
97572c04 185 size_t memory_map_size, bool populate)
74d81a6c
MD
186{
187 struct shm_object *obj;
188 void *memory_map;
ff0f5728 189 int waitfd[2], i, ret;
74d81a6c
MD
190
191 if (table->allocated_len >= table->size)
192 return NULL;
193 obj = &table->objects[table->allocated_len];
194
97572c04 195 memory_map = zmalloc_populate(memory_map_size, populate);
74d81a6c
MD
196 if (!memory_map)
197 goto alloc_error;
198
ff0f5728 199 /* wait_fd: create pipe */
8a8c2117 200 ret = pipe2(waitfd, O_CLOEXEC);
ff0f5728
MD
201 if (ret < 0) {
202 PERROR("pipe");
203 goto error_pipe;
204 }
ff0f5728
MD
205 /* The write end of the pipe needs to be non-blocking */
206 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
207 if (ret < 0) {
208 PERROR("fcntl");
209 goto error_fcntl;
210 }
211 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
212
213 /* no shm_fd */
74d81a6c 214 obj->shm_fd = -1;
5ea386c3 215 obj->shm_fd_ownership = 0;
74d81a6c
MD
216
217 obj->type = SHM_OBJECT_MEM;
218 obj->memory_map = memory_map;
219 obj->memory_map_size = memory_map_size;
220 obj->allocated_len = 0;
221 obj->index = table->allocated_len++;
222
223 return obj;
224
ff0f5728
MD
225error_fcntl:
226 for (i = 0; i < 2; i++) {
227 ret = close(waitfd[i]);
228 if (ret) {
229 PERROR("close");
230 assert(0);
231 }
232 }
233error_pipe:
234 free(memory_map);
74d81a6c
MD
235alloc_error:
236 return NULL;
237}
238
8a208943
MD
239/*
240 * libnuma prints errors on the console even for numa_available().
241 * Work-around this limitation by using get_mempolicy() directly to
242 * check whether the kernel supports mempolicy.
243 */
244#ifdef HAVE_LIBNUMA
245static bool lttng_is_numa_available(void)
246{
247 int ret;
248
249 ret = get_mempolicy(NULL, NULL, 0, NULL, 0);
250 if (ret && errno == ENOSYS) {
251 return false;
252 }
253 return numa_available() > 0;
254}
255#endif
256
c494c0f1 257#ifdef HAVE_LIBNUMA
74d81a6c
MD
258struct shm_object *shm_object_table_alloc(struct shm_object_table *table,
259 size_t memory_map_size,
a9ff648c 260 enum shm_object_type type,
4b68c31f 261 int stream_fd,
97572c04
MD
262 int cpu,
263 bool populate)
c494c0f1
MJ
264#else
265struct shm_object *shm_object_table_alloc(struct shm_object_table *table,
266 size_t memory_map_size,
267 enum shm_object_type type,
268 int stream_fd,
97572c04
MD
269 int cpu __attribute__((unused)),
270 bool populate)
c494c0f1 271#endif
74d81a6c 272{
4b68c31f 273 struct shm_object *shm_object;
bfcda6ce 274#ifdef HAVE_LIBNUMA
8a208943
MD
275 int oldnode = 0, node;
276 bool numa_avail;
4b68c31f 277
8a208943
MD
278 numa_avail = lttng_is_numa_available();
279 if (numa_avail) {
280 oldnode = numa_preferred();
281 if (cpu >= 0) {
282 node = numa_node_of_cpu(cpu);
283 if (node >= 0)
284 numa_set_preferred(node);
285 }
286 if (cpu < 0 || node < 0)
287 numa_set_localalloc();
4b68c31f 288 }
bfcda6ce 289#endif /* HAVE_LIBNUMA */
74d81a6c
MD
290 switch (type) {
291 case SHM_OBJECT_SHM:
4b68c31f 292 shm_object = _shm_object_table_alloc_shm(table, memory_map_size,
97572c04 293 stream_fd, populate);
4b68c31f 294 break;
74d81a6c 295 case SHM_OBJECT_MEM:
97572c04
MD
296 shm_object = _shm_object_table_alloc_mem(table, memory_map_size,
297 populate);
4b68c31f 298 break;
74d81a6c
MD
299 default:
300 assert(0);
301 }
bfcda6ce 302#ifdef HAVE_LIBNUMA
8a208943
MD
303 if (numa_avail)
304 numa_set_preferred(oldnode);
bfcda6ce 305#endif /* HAVE_LIBNUMA */
4b68c31f 306 return shm_object;
74d81a6c
MD
307}
308
309struct shm_object *shm_object_table_append_shm(struct shm_object_table *table,
310 int shm_fd, int wakeup_fd, uint32_t stream_nr,
97572c04 311 size_t memory_map_size, bool populate)
193183fb 312{
97572c04 313 int flags = MAP_SHARED;
193183fb
MD
314 struct shm_object *obj;
315 char *memory_map;
74d81a6c 316 int ret;
193183fb
MD
317
318 if (table->allocated_len >= table->size)
319 return NULL;
74d81a6c
MD
320 /* streams _must_ be received in sequential order, else fail. */
321 if (stream_nr + 1 != table->allocated_len)
322 return NULL;
323
193183fb
MD
324 obj = &table->objects[table->allocated_len];
325
74d81a6c
MD
326 /* wait_fd: set write end of the pipe. */
327 obj->wait_fd[0] = -1; /* read end is unset */
328 obj->wait_fd[1] = wakeup_fd;
193183fb 329 obj->shm_fd = shm_fd;
5ea386c3 330 obj->shm_fd_ownership = 1;
193183fb 331
74d81a6c
MD
332 /* The write end of the pipe needs to be non-blocking */
333 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
334 if (ret < 0) {
335 PERROR("fcntl");
336 goto error_fcntl;
337 }
338
97572c04
MD
339 if (populate)
340 flags |= LTTNG_MAP_POPULATE;
193183fb
MD
341 /* memory_map: mmap */
342 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
97572c04 343 flags, shm_fd, 0);
193183fb
MD
344 if (memory_map == MAP_FAILED) {
345 PERROR("mmap");
346 goto error_mmap;
347 }
74d81a6c 348 obj->type = SHM_OBJECT_SHM;
193183fb
MD
349 obj->memory_map = memory_map;
350 obj->memory_map_size = memory_map_size;
351 obj->allocated_len = memory_map_size;
352 obj->index = table->allocated_len++;
353
354 return obj;
355
74d81a6c 356error_fcntl:
193183fb
MD
357error_mmap:
358 return NULL;
359}
360
74d81a6c
MD
361/*
362 * Passing ownership of mem to object.
363 */
364struct shm_object *shm_object_table_append_mem(struct shm_object_table *table,
ff0f5728 365 void *mem, size_t memory_map_size, int wakeup_fd)
74d81a6c
MD
366{
367 struct shm_object *obj;
ff0f5728 368 int ret;
74d81a6c
MD
369
370 if (table->allocated_len >= table->size)
371 return NULL;
372 obj = &table->objects[table->allocated_len];
373
ff0f5728
MD
374 obj->wait_fd[0] = -1; /* read end is unset */
375 obj->wait_fd[1] = wakeup_fd;
74d81a6c 376 obj->shm_fd = -1;
5ea386c3 377 obj->shm_fd_ownership = 0;
74d81a6c 378
ff0f5728
MD
379 /* The write end of the pipe needs to be non-blocking */
380 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
381 if (ret < 0) {
382 PERROR("fcntl");
383 goto error_fcntl;
384 }
385
74d81a6c
MD
386 obj->type = SHM_OBJECT_MEM;
387 obj->memory_map = mem;
388 obj->memory_map_size = memory_map_size;
389 obj->allocated_len = memory_map_size;
390 obj->index = table->allocated_len++;
391
392 return obj;
ff0f5728
MD
393
394error_fcntl:
395 return NULL;
74d81a6c
MD
396}
397
1d498196 398static
6548fca4 399void shmp_object_destroy(struct shm_object *obj, int consumer)
1d498196 400{
74d81a6c
MD
401 switch (obj->type) {
402 case SHM_OBJECT_SHM:
403 {
404 int ret, i;
1d498196 405
7a784989
MD
406 ret = munmap(obj->memory_map, obj->memory_map_size);
407 if (ret) {
408 PERROR("umnmap");
409 assert(0);
410 }
6548fca4 411
5ea386c3 412 if (obj->shm_fd_ownership) {
6548fca4
MD
413 /* Delete FDs only if called from app (not consumer). */
414 if (!consumer) {
415 lttng_ust_lock_fd_tracker();
416 ret = close(obj->shm_fd);
417 if (!ret) {
418 lttng_ust_delete_fd_from_tracker(obj->shm_fd);
419 } else {
420 PERROR("close");
421 assert(0);
422 }
423 lttng_ust_unlock_fd_tracker();
424 } else {
425 ret = close(obj->shm_fd);
426 if (ret) {
427 PERROR("close");
428 assert(0);
429 }
a9ff648c
MD
430 }
431 }
74d81a6c
MD
432 for (i = 0; i < 2; i++) {
433 if (obj->wait_fd[i] < 0)
434 continue;
6548fca4
MD
435 if (!consumer) {
436 lttng_ust_lock_fd_tracker();
437 ret = close(obj->wait_fd[i]);
438 if (!ret) {
439 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
440 } else {
441 PERROR("close");
442 assert(0);
443 }
444 lttng_ust_unlock_fd_tracker();
445 } else {
446 ret = close(obj->wait_fd[i]);
447 if (ret) {
448 PERROR("close");
449 assert(0);
450 }
74d81a6c 451 }
1d498196 452 }
74d81a6c
MD
453 break;
454 }
455 case SHM_OBJECT_MEM:
ff0f5728
MD
456 {
457 int ret, i;
458
459 for (i = 0; i < 2; i++) {
460 if (obj->wait_fd[i] < 0)
461 continue;
6548fca4
MD
462 if (!consumer) {
463 lttng_ust_lock_fd_tracker();
464 ret = close(obj->wait_fd[i]);
465 if (!ret) {
466 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
467 } else {
468 PERROR("close");
469 assert(0);
470 }
471 lttng_ust_unlock_fd_tracker();
472 } else {
473 ret = close(obj->wait_fd[i]);
474 if (ret) {
475 PERROR("close");
476 assert(0);
477 }
ff0f5728
MD
478 }
479 }
74d81a6c
MD
480 free(obj->memory_map);
481 break;
ff0f5728 482 }
74d81a6c
MD
483 default:
484 assert(0);
1d498196
MD
485 }
486}
487
6548fca4 488void shm_object_table_destroy(struct shm_object_table *table, int consumer)
1d498196
MD
489{
490 int i;
491
492 for (i = 0; i < table->allocated_len; i++)
6548fca4 493 shmp_object_destroy(&table->objects[i], consumer);
1d498196
MD
494 free(table);
495}
496
497/*
498 * zalloc_shm - allocate memory within a shm object.
499 *
500 * Shared memory is already zeroed by shmget.
501 * *NOT* multithread-safe (should be protected by mutex).
502 * Returns a -1, -1 tuple on error.
503 */
504struct shm_ref zalloc_shm(struct shm_object *obj, size_t len)
505{
506 struct shm_ref ref;
507 struct shm_ref shm_ref_error = { -1, -1 };
508
509 if (obj->memory_map_size - obj->allocated_len < len)
510 return shm_ref_error;
511 ref.index = obj->index;
512 ref.offset = obj->allocated_len;
513 obj->allocated_len += len;
514 return ref;
515}
516
517void align_shm(struct shm_object *obj, size_t align)
518{
b72687b8 519 size_t offset_len = lttng_ust_offset_align(obj->allocated_len, align);
1d498196
MD
520 obj->allocated_len += offset_len;
521}
This page took 0.064659 seconds and 4 git commands to generate.