numa support: allow disabling numa support
[lttng-ust.git] / libringbuffer / shm.c
CommitLineData
1d498196
MD
1/*
2 * libringbuffer/shm.c
3 *
e92f3e28 4 * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
1d498196 5 *
e92f3e28
MD
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; only
9 * version 2.1 of the License.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1d498196
MD
19 */
20
3fbec7dc 21#define _LGPL_SOURCE
bfcda6ce 22#include <config.h>
1d498196
MD
23#include "shm.h"
24#include <unistd.h>
25#include <fcntl.h>
26#include <sys/mman.h>
a9ff648c 27#include <sys/types.h>
1d498196
MD
28#include <sys/stat.h> /* For mode constants */
29#include <fcntl.h> /* For O_* constants */
30#include <assert.h>
8da6cd6d
MD
31#include <stdio.h>
32#include <signal.h>
33#include <dirent.h>
4318ae1b 34#include <lttng/align.h>
96e80018 35#include <limits.h>
bfcda6ce 36#ifdef HAVE_LIBNUMA
4b68c31f 37#include <numa.h>
bfcda6ce 38#endif
3a81f31d 39#include <helper.h>
6548fca4 40#include <ust-fd.h>
3a81f31d
MD
41
42/*
43 * Ensure we have the required amount of space available by writing 0
44 * into the entire buffer. Not doing so can trigger SIGBUS when going
45 * beyond the available shm space.
46 */
47static
48int zero_file(int fd, size_t len)
49{
50 ssize_t retlen;
51 size_t written = 0;
52 char *zeropage;
53 long pagelen;
54 int ret;
55
56 pagelen = sysconf(_SC_PAGESIZE);
57 if (pagelen < 0)
58 return (int) pagelen;
59 zeropage = calloc(pagelen, 1);
60 if (!zeropage)
61 return -ENOMEM;
62
63 while (len > written) {
64 do {
65 retlen = write(fd, zeropage,
66 min_t(size_t, pagelen, len - written));
67 } while (retlen == -1UL && errno == EINTR);
68 if (retlen < 0) {
69 ret = (int) retlen;
70 goto error;
71 }
72 written += retlen;
73 }
74 ret = 0;
75error:
76 free(zeropage);
77 return ret;
78}
1d498196
MD
79
80struct shm_object_table *shm_object_table_create(size_t max_nb_obj)
81{
82 struct shm_object_table *table;
83
84 table = zmalloc(sizeof(struct shm_object_table) +
85 max_nb_obj * sizeof(table->objects[0]));
74d48abe
MD
86 if (!table)
87 return NULL;
1d498196
MD
88 table->size = max_nb_obj;
89 return table;
90}
91
74d81a6c
MD
92static
93struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
a9ff648c 94 size_t memory_map_size,
5ea386c3 95 int stream_fd)
1d498196 96{
5ea386c3 97 int shmfd, waitfd[2], ret, i;
1d498196
MD
98 struct shm_object *obj;
99 char *memory_map;
100
5ea386c3
MD
101 if (stream_fd < 0)
102 return NULL;
1d498196
MD
103 if (table->allocated_len >= table->size)
104 return NULL;
7a9c21bd 105 obj = &table->objects[table->allocated_len];
1d498196
MD
106
107 /* wait_fd: create pipe */
108 ret = pipe(waitfd);
109 if (ret < 0) {
110 PERROR("pipe");
111 goto error_pipe;
112 }
113 for (i = 0; i < 2; i++) {
114 ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC);
115 if (ret < 0) {
116 PERROR("fcntl");
117 goto error_fcntl;
118 }
119 }
5d61a504
MD
120 /* The write end of the pipe needs to be non-blocking */
121 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
122 if (ret < 0) {
123 PERROR("fcntl");
124 goto error_fcntl;
125 }
7a9c21bd 126 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
1d498196 127
5ea386c3 128 /* create shm */
a9ff648c 129
5ea386c3 130 shmfd = stream_fd;
3a81f31d
MD
131 ret = zero_file(shmfd, memory_map_size);
132 if (ret) {
133 PERROR("zero_file");
134 goto error_zero_file;
135 }
1d498196
MD
136 ret = ftruncate(shmfd, memory_map_size);
137 if (ret) {
138 PERROR("ftruncate");
139 goto error_ftruncate;
140 }
d0f6cf57
MD
141 /*
142 * Also ensure the file metadata is synced with the storage by using
143 * fsync(2).
144 */
145 ret = fsync(shmfd);
146 if (ret) {
147 PERROR("fsync");
148 goto error_fsync;
149 }
5ea386c3 150 obj->shm_fd_ownership = 0;
1d498196
MD
151 obj->shm_fd = shmfd;
152
153 /* memory_map: mmap */
154 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
155 MAP_SHARED, shmfd, 0);
156 if (memory_map == MAP_FAILED) {
157 PERROR("mmap");
158 goto error_mmap;
159 }
74d81a6c 160 obj->type = SHM_OBJECT_SHM;
1d498196
MD
161 obj->memory_map = memory_map;
162 obj->memory_map_size = memory_map_size;
163 obj->allocated_len = 0;
dc613eb9 164 obj->index = table->allocated_len++;
7a9c21bd 165
1d498196
MD
166 return obj;
167
168error_mmap:
d0f6cf57 169error_fsync:
1d498196 170error_ftruncate:
3a81f31d 171error_zero_file:
1d498196
MD
172error_fcntl:
173 for (i = 0; i < 2; i++) {
174 ret = close(waitfd[i]);
175 if (ret) {
176 PERROR("close");
177 assert(0);
178 }
179 }
180error_pipe:
1d498196 181 return NULL;
1d498196
MD
182}
183
74d81a6c
MD
184static
185struct shm_object *_shm_object_table_alloc_mem(struct shm_object_table *table,
186 size_t memory_map_size)
187{
188 struct shm_object *obj;
189 void *memory_map;
ff0f5728 190 int waitfd[2], i, ret;
74d81a6c
MD
191
192 if (table->allocated_len >= table->size)
193 return NULL;
194 obj = &table->objects[table->allocated_len];
195
196 memory_map = zmalloc(memory_map_size);
197 if (!memory_map)
198 goto alloc_error;
199
ff0f5728
MD
200 /* wait_fd: create pipe */
201 ret = pipe(waitfd);
202 if (ret < 0) {
203 PERROR("pipe");
204 goto error_pipe;
205 }
206 for (i = 0; i < 2; i++) {
207 ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC);
208 if (ret < 0) {
209 PERROR("fcntl");
210 goto error_fcntl;
211 }
212 }
213 /* The write end of the pipe needs to be non-blocking */
214 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
215 if (ret < 0) {
216 PERROR("fcntl");
217 goto error_fcntl;
218 }
219 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
220
221 /* no shm_fd */
74d81a6c 222 obj->shm_fd = -1;
5ea386c3 223 obj->shm_fd_ownership = 0;
74d81a6c
MD
224
225 obj->type = SHM_OBJECT_MEM;
226 obj->memory_map = memory_map;
227 obj->memory_map_size = memory_map_size;
228 obj->allocated_len = 0;
229 obj->index = table->allocated_len++;
230
231 return obj;
232
ff0f5728
MD
233error_fcntl:
234 for (i = 0; i < 2; i++) {
235 ret = close(waitfd[i]);
236 if (ret) {
237 PERROR("close");
238 assert(0);
239 }
240 }
241error_pipe:
242 free(memory_map);
74d81a6c
MD
243alloc_error:
244 return NULL;
245}
246
247struct shm_object *shm_object_table_alloc(struct shm_object_table *table,
248 size_t memory_map_size,
a9ff648c 249 enum shm_object_type type,
4b68c31f
MD
250 int stream_fd,
251 int cpu)
74d81a6c 252{
4b68c31f 253 struct shm_object *shm_object;
bfcda6ce
MD
254#ifdef HAVE_LIBNUMA
255 int oldnode, node;
4b68c31f
MD
256
257 oldnode = numa_preferred();
258 if (cpu >= 0) {
259 node = numa_node_of_cpu(cpu);
260 if (node >= 0)
261 numa_set_preferred(node);
262 }
263 if (cpu < 0 || node < 0)
264 numa_set_localalloc();
bfcda6ce 265#endif /* HAVE_LIBNUMA */
74d81a6c
MD
266 switch (type) {
267 case SHM_OBJECT_SHM:
4b68c31f 268 shm_object = _shm_object_table_alloc_shm(table, memory_map_size,
5ea386c3 269 stream_fd);
4b68c31f 270 break;
74d81a6c 271 case SHM_OBJECT_MEM:
4b68c31f
MD
272 shm_object = _shm_object_table_alloc_mem(table, memory_map_size);
273 break;
74d81a6c
MD
274 default:
275 assert(0);
276 }
bfcda6ce 277#ifdef HAVE_LIBNUMA
4b68c31f 278 numa_set_preferred(oldnode);
bfcda6ce 279#endif /* HAVE_LIBNUMA */
4b68c31f 280 return shm_object;
74d81a6c
MD
281}
282
283struct shm_object *shm_object_table_append_shm(struct shm_object_table *table,
284 int shm_fd, int wakeup_fd, uint32_t stream_nr,
285 size_t memory_map_size)
193183fb
MD
286{
287 struct shm_object *obj;
288 char *memory_map;
74d81a6c 289 int ret;
193183fb
MD
290
291 if (table->allocated_len >= table->size)
292 return NULL;
74d81a6c
MD
293 /* streams _must_ be received in sequential order, else fail. */
294 if (stream_nr + 1 != table->allocated_len)
295 return NULL;
296
193183fb
MD
297 obj = &table->objects[table->allocated_len];
298
74d81a6c
MD
299 /* wait_fd: set write end of the pipe. */
300 obj->wait_fd[0] = -1; /* read end is unset */
301 obj->wait_fd[1] = wakeup_fd;
193183fb 302 obj->shm_fd = shm_fd;
5ea386c3 303 obj->shm_fd_ownership = 1;
193183fb 304
74d81a6c
MD
305 ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC);
306 if (ret < 0) {
307 PERROR("fcntl");
308 goto error_fcntl;
309 }
310 /* The write end of the pipe needs to be non-blocking */
311 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
312 if (ret < 0) {
313 PERROR("fcntl");
314 goto error_fcntl;
315 }
316
193183fb
MD
317 /* memory_map: mmap */
318 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
319 MAP_SHARED, shm_fd, 0);
320 if (memory_map == MAP_FAILED) {
321 PERROR("mmap");
322 goto error_mmap;
323 }
74d81a6c 324 obj->type = SHM_OBJECT_SHM;
193183fb
MD
325 obj->memory_map = memory_map;
326 obj->memory_map_size = memory_map_size;
327 obj->allocated_len = memory_map_size;
328 obj->index = table->allocated_len++;
329
330 return obj;
331
74d81a6c 332error_fcntl:
193183fb
MD
333error_mmap:
334 return NULL;
335}
336
74d81a6c
MD
337/*
338 * Passing ownership of mem to object.
339 */
340struct shm_object *shm_object_table_append_mem(struct shm_object_table *table,
ff0f5728 341 void *mem, size_t memory_map_size, int wakeup_fd)
74d81a6c
MD
342{
343 struct shm_object *obj;
ff0f5728 344 int ret;
74d81a6c
MD
345
346 if (table->allocated_len >= table->size)
347 return NULL;
348 obj = &table->objects[table->allocated_len];
349
ff0f5728
MD
350 obj->wait_fd[0] = -1; /* read end is unset */
351 obj->wait_fd[1] = wakeup_fd;
74d81a6c 352 obj->shm_fd = -1;
5ea386c3 353 obj->shm_fd_ownership = 0;
74d81a6c 354
ff0f5728
MD
355 ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC);
356 if (ret < 0) {
357 PERROR("fcntl");
358 goto error_fcntl;
359 }
360 /* The write end of the pipe needs to be non-blocking */
361 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
362 if (ret < 0) {
363 PERROR("fcntl");
364 goto error_fcntl;
365 }
366
74d81a6c
MD
367 obj->type = SHM_OBJECT_MEM;
368 obj->memory_map = mem;
369 obj->memory_map_size = memory_map_size;
370 obj->allocated_len = memory_map_size;
371 obj->index = table->allocated_len++;
372
373 return obj;
ff0f5728
MD
374
375error_fcntl:
376 return NULL;
74d81a6c
MD
377}
378
1d498196 379static
6548fca4 380void shmp_object_destroy(struct shm_object *obj, int consumer)
1d498196 381{
74d81a6c
MD
382 switch (obj->type) {
383 case SHM_OBJECT_SHM:
384 {
385 int ret, i;
1d498196 386
7a784989
MD
387 ret = munmap(obj->memory_map, obj->memory_map_size);
388 if (ret) {
389 PERROR("umnmap");
390 assert(0);
391 }
6548fca4 392
5ea386c3 393 if (obj->shm_fd_ownership) {
6548fca4
MD
394 /* Delete FDs only if called from app (not consumer). */
395 if (!consumer) {
396 lttng_ust_lock_fd_tracker();
397 ret = close(obj->shm_fd);
398 if (!ret) {
399 lttng_ust_delete_fd_from_tracker(obj->shm_fd);
400 } else {
401 PERROR("close");
402 assert(0);
403 }
404 lttng_ust_unlock_fd_tracker();
405 } else {
406 ret = close(obj->shm_fd);
407 if (ret) {
408 PERROR("close");
409 assert(0);
410 }
a9ff648c
MD
411 }
412 }
74d81a6c
MD
413 for (i = 0; i < 2; i++) {
414 if (obj->wait_fd[i] < 0)
415 continue;
6548fca4
MD
416 if (!consumer) {
417 lttng_ust_lock_fd_tracker();
418 ret = close(obj->wait_fd[i]);
419 if (!ret) {
420 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
421 } else {
422 PERROR("close");
423 assert(0);
424 }
425 lttng_ust_unlock_fd_tracker();
426 } else {
427 ret = close(obj->wait_fd[i]);
428 if (ret) {
429 PERROR("close");
430 assert(0);
431 }
74d81a6c 432 }
1d498196 433 }
74d81a6c
MD
434 break;
435 }
436 case SHM_OBJECT_MEM:
ff0f5728
MD
437 {
438 int ret, i;
439
440 for (i = 0; i < 2; i++) {
441 if (obj->wait_fd[i] < 0)
442 continue;
6548fca4
MD
443 if (!consumer) {
444 lttng_ust_lock_fd_tracker();
445 ret = close(obj->wait_fd[i]);
446 if (!ret) {
447 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
448 } else {
449 PERROR("close");
450 assert(0);
451 }
452 lttng_ust_unlock_fd_tracker();
453 } else {
454 ret = close(obj->wait_fd[i]);
455 if (ret) {
456 PERROR("close");
457 assert(0);
458 }
ff0f5728
MD
459 }
460 }
74d81a6c
MD
461 free(obj->memory_map);
462 break;
ff0f5728 463 }
74d81a6c
MD
464 default:
465 assert(0);
1d498196
MD
466 }
467}
468
6548fca4 469void shm_object_table_destroy(struct shm_object_table *table, int consumer)
1d498196
MD
470{
471 int i;
472
473 for (i = 0; i < table->allocated_len; i++)
6548fca4 474 shmp_object_destroy(&table->objects[i], consumer);
1d498196
MD
475 free(table);
476}
477
478/*
479 * zalloc_shm - allocate memory within a shm object.
480 *
481 * Shared memory is already zeroed by shmget.
482 * *NOT* multithread-safe (should be protected by mutex).
483 * Returns a -1, -1 tuple on error.
484 */
485struct shm_ref zalloc_shm(struct shm_object *obj, size_t len)
486{
487 struct shm_ref ref;
488 struct shm_ref shm_ref_error = { -1, -1 };
489
490 if (obj->memory_map_size - obj->allocated_len < len)
491 return shm_ref_error;
492 ref.index = obj->index;
493 ref.offset = obj->allocated_len;
494 obj->allocated_len += len;
495 return ref;
496}
497
498void align_shm(struct shm_object *obj, size_t align)
499{
500 size_t offset_len = offset_align(obj->allocated_len, align);
501 obj->allocated_len += offset_len;
502}
This page took 0.050596 seconds and 4 git commands to generate.