Fix: set FD_CLOEXEC on incoming FDs.
[lttng-ust.git] / libringbuffer / shm.c
1 /*
2 * libringbuffer/shm.c
3 *
4 * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; only
9 * version 2.1 of the License.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #define _LGPL_SOURCE
22 #include <config.h>
23 #include "shm.h"
24 #include <unistd.h>
25 #include <fcntl.h>
26 #include <sys/mman.h>
27 #include <sys/types.h>
28 #include <sys/stat.h> /* For mode constants */
29 #include <fcntl.h> /* For O_* constants */
30 #include <assert.h>
31 #include <stdio.h>
32 #include <signal.h>
33 #include <dirent.h>
34 #include <lttng/align.h>
35 #include <limits.h>
36 #include <stdbool.h>
37 #ifdef HAVE_LIBNUMA
38 #include <numa.h>
39 #include <numaif.h>
40 #endif
41 #include <helper.h>
42 #include <ust-fd.h>
43 #include "mmap.h"
44
45 /*
46 * Ensure we have the required amount of space available by writing 0
47 * into the entire buffer. Not doing so can trigger SIGBUS when going
48 * beyond the available shm space.
49 */
50 static
51 int zero_file(int fd, size_t len)
52 {
53 ssize_t retlen;
54 size_t written = 0;
55 char *zeropage;
56 long pagelen;
57 int ret;
58
59 pagelen = sysconf(_SC_PAGESIZE);
60 if (pagelen < 0)
61 return (int) pagelen;
62 zeropage = calloc(pagelen, 1);
63 if (!zeropage)
64 return -ENOMEM;
65
66 while (len > written) {
67 do {
68 retlen = write(fd, zeropage,
69 min_t(size_t, pagelen, len - written));
70 } while (retlen == -1UL && errno == EINTR);
71 if (retlen < 0) {
72 ret = (int) retlen;
73 goto error;
74 }
75 written += retlen;
76 }
77 ret = 0;
78 error:
79 free(zeropage);
80 return ret;
81 }
82
83 struct shm_object_table *shm_object_table_create(size_t max_nb_obj)
84 {
85 struct shm_object_table *table;
86
87 table = zmalloc(sizeof(struct shm_object_table) +
88 max_nb_obj * sizeof(table->objects[0]));
89 if (!table)
90 return NULL;
91 table->size = max_nb_obj;
92 return table;
93 }
94
95 static
96 struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
97 size_t memory_map_size,
98 int stream_fd)
99 {
100 int shmfd, waitfd[2], ret, i;
101 struct shm_object *obj;
102 char *memory_map;
103
104 if (stream_fd < 0)
105 return NULL;
106 if (table->allocated_len >= table->size)
107 return NULL;
108 obj = &table->objects[table->allocated_len];
109
110 /* wait_fd: create pipe */
111 ret = pipe(waitfd);
112 if (ret < 0) {
113 PERROR("pipe");
114 goto error_pipe;
115 }
116 for (i = 0; i < 2; i++) {
117 ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC);
118 if (ret < 0) {
119 PERROR("fcntl");
120 goto error_fcntl;
121 }
122 }
123 /* The write end of the pipe needs to be non-blocking */
124 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
125 if (ret < 0) {
126 PERROR("fcntl");
127 goto error_fcntl;
128 }
129 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
130
131 /* create shm */
132
133 shmfd = stream_fd;
134 ret = zero_file(shmfd, memory_map_size);
135 if (ret) {
136 PERROR("zero_file");
137 goto error_zero_file;
138 }
139 ret = ftruncate(shmfd, memory_map_size);
140 if (ret) {
141 PERROR("ftruncate");
142 goto error_ftruncate;
143 }
144 /*
145 * Also ensure the file metadata is synced with the storage by using
146 * fsync(2).
147 */
148 ret = fsync(shmfd);
149 if (ret) {
150 PERROR("fsync");
151 goto error_fsync;
152 }
153 obj->shm_fd_ownership = 0;
154 obj->shm_fd = shmfd;
155
156 /* memory_map: mmap */
157 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
158 MAP_SHARED | LTTNG_MAP_POPULATE, shmfd, 0);
159 if (memory_map == MAP_FAILED) {
160 PERROR("mmap");
161 goto error_mmap;
162 }
163 obj->type = SHM_OBJECT_SHM;
164 obj->memory_map = memory_map;
165 obj->memory_map_size = memory_map_size;
166 obj->allocated_len = 0;
167 obj->index = table->allocated_len++;
168
169 return obj;
170
171 error_mmap:
172 error_fsync:
173 error_ftruncate:
174 error_zero_file:
175 error_fcntl:
176 for (i = 0; i < 2; i++) {
177 ret = close(waitfd[i]);
178 if (ret) {
179 PERROR("close");
180 assert(0);
181 }
182 }
183 error_pipe:
184 return NULL;
185 }
186
187 static
188 struct shm_object *_shm_object_table_alloc_mem(struct shm_object_table *table,
189 size_t memory_map_size)
190 {
191 struct shm_object *obj;
192 void *memory_map;
193 int waitfd[2], i, ret;
194
195 if (table->allocated_len >= table->size)
196 return NULL;
197 obj = &table->objects[table->allocated_len];
198
199 memory_map = zmalloc(memory_map_size);
200 if (!memory_map)
201 goto alloc_error;
202
203 /* wait_fd: create pipe */
204 ret = pipe(waitfd);
205 if (ret < 0) {
206 PERROR("pipe");
207 goto error_pipe;
208 }
209 for (i = 0; i < 2; i++) {
210 ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC);
211 if (ret < 0) {
212 PERROR("fcntl");
213 goto error_fcntl;
214 }
215 }
216 /* The write end of the pipe needs to be non-blocking */
217 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
218 if (ret < 0) {
219 PERROR("fcntl");
220 goto error_fcntl;
221 }
222 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
223
224 /* no shm_fd */
225 obj->shm_fd = -1;
226 obj->shm_fd_ownership = 0;
227
228 obj->type = SHM_OBJECT_MEM;
229 obj->memory_map = memory_map;
230 obj->memory_map_size = memory_map_size;
231 obj->allocated_len = 0;
232 obj->index = table->allocated_len++;
233
234 return obj;
235
236 error_fcntl:
237 for (i = 0; i < 2; i++) {
238 ret = close(waitfd[i]);
239 if (ret) {
240 PERROR("close");
241 assert(0);
242 }
243 }
244 error_pipe:
245 free(memory_map);
246 alloc_error:
247 return NULL;
248 }
249
250 /*
251 * libnuma prints errors on the console even for numa_available().
252 * Work-around this limitation by using get_mempolicy() directly to
253 * check whether the kernel supports mempolicy.
254 */
255 #ifdef HAVE_LIBNUMA
256 static bool lttng_is_numa_available(void)
257 {
258 int ret;
259
260 ret = get_mempolicy(NULL, NULL, 0, NULL, 0);
261 if (ret && errno == ENOSYS) {
262 return false;
263 }
264 return numa_available() > 0;
265 }
266 #endif
267
268 struct shm_object *shm_object_table_alloc(struct shm_object_table *table,
269 size_t memory_map_size,
270 enum shm_object_type type,
271 int stream_fd,
272 int cpu)
273 {
274 struct shm_object *shm_object;
275 #ifdef HAVE_LIBNUMA
276 int oldnode = 0, node;
277 bool numa_avail;
278
279 numa_avail = lttng_is_numa_available();
280 if (numa_avail) {
281 oldnode = numa_preferred();
282 if (cpu >= 0) {
283 node = numa_node_of_cpu(cpu);
284 if (node >= 0)
285 numa_set_preferred(node);
286 }
287 if (cpu < 0 || node < 0)
288 numa_set_localalloc();
289 }
290 #endif /* HAVE_LIBNUMA */
291 switch (type) {
292 case SHM_OBJECT_SHM:
293 shm_object = _shm_object_table_alloc_shm(table, memory_map_size,
294 stream_fd);
295 break;
296 case SHM_OBJECT_MEM:
297 shm_object = _shm_object_table_alloc_mem(table, memory_map_size);
298 break;
299 default:
300 assert(0);
301 }
302 #ifdef HAVE_LIBNUMA
303 if (numa_avail)
304 numa_set_preferred(oldnode);
305 #endif /* HAVE_LIBNUMA */
306 return shm_object;
307 }
308
309 struct shm_object *shm_object_table_append_shm(struct shm_object_table *table,
310 int shm_fd, int wakeup_fd, uint32_t stream_nr,
311 size_t memory_map_size)
312 {
313 struct shm_object *obj;
314 char *memory_map;
315 int ret;
316
317 if (table->allocated_len >= table->size)
318 return NULL;
319 /* streams _must_ be received in sequential order, else fail. */
320 if (stream_nr + 1 != table->allocated_len)
321 return NULL;
322
323 obj = &table->objects[table->allocated_len];
324
325 /* wait_fd: set write end of the pipe. */
326 obj->wait_fd[0] = -1; /* read end is unset */
327 obj->wait_fd[1] = wakeup_fd;
328 obj->shm_fd = shm_fd;
329 obj->shm_fd_ownership = 1;
330
331 /* The write end of the pipe needs to be non-blocking */
332 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
333 if (ret < 0) {
334 PERROR("fcntl");
335 goto error_fcntl;
336 }
337
338 /* memory_map: mmap */
339 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
340 MAP_SHARED | LTTNG_MAP_POPULATE, shm_fd, 0);
341 if (memory_map == MAP_FAILED) {
342 PERROR("mmap");
343 goto error_mmap;
344 }
345 obj->type = SHM_OBJECT_SHM;
346 obj->memory_map = memory_map;
347 obj->memory_map_size = memory_map_size;
348 obj->allocated_len = memory_map_size;
349 obj->index = table->allocated_len++;
350
351 return obj;
352
353 error_fcntl:
354 error_mmap:
355 return NULL;
356 }
357
358 /*
359 * Passing ownership of mem to object.
360 */
361 struct shm_object *shm_object_table_append_mem(struct shm_object_table *table,
362 void *mem, size_t memory_map_size, int wakeup_fd)
363 {
364 struct shm_object *obj;
365 int ret;
366
367 if (table->allocated_len >= table->size)
368 return NULL;
369 obj = &table->objects[table->allocated_len];
370
371 obj->wait_fd[0] = -1; /* read end is unset */
372 obj->wait_fd[1] = wakeup_fd;
373 obj->shm_fd = -1;
374 obj->shm_fd_ownership = 0;
375
376 ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC);
377 if (ret < 0) {
378 PERROR("fcntl");
379 goto error_fcntl;
380 }
381 /* The write end of the pipe needs to be non-blocking */
382 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
383 if (ret < 0) {
384 PERROR("fcntl");
385 goto error_fcntl;
386 }
387
388 obj->type = SHM_OBJECT_MEM;
389 obj->memory_map = mem;
390 obj->memory_map_size = memory_map_size;
391 obj->allocated_len = memory_map_size;
392 obj->index = table->allocated_len++;
393
394 return obj;
395
396 error_fcntl:
397 return NULL;
398 }
399
400 static
401 void shmp_object_destroy(struct shm_object *obj, int consumer)
402 {
403 switch (obj->type) {
404 case SHM_OBJECT_SHM:
405 {
406 int ret, i;
407
408 ret = munmap(obj->memory_map, obj->memory_map_size);
409 if (ret) {
410 PERROR("umnmap");
411 assert(0);
412 }
413
414 if (obj->shm_fd_ownership) {
415 /* Delete FDs only if called from app (not consumer). */
416 if (!consumer) {
417 lttng_ust_lock_fd_tracker();
418 ret = close(obj->shm_fd);
419 if (!ret) {
420 lttng_ust_delete_fd_from_tracker(obj->shm_fd);
421 } else {
422 PERROR("close");
423 assert(0);
424 }
425 lttng_ust_unlock_fd_tracker();
426 } else {
427 ret = close(obj->shm_fd);
428 if (ret) {
429 PERROR("close");
430 assert(0);
431 }
432 }
433 }
434 for (i = 0; i < 2; i++) {
435 if (obj->wait_fd[i] < 0)
436 continue;
437 if (!consumer) {
438 lttng_ust_lock_fd_tracker();
439 ret = close(obj->wait_fd[i]);
440 if (!ret) {
441 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
442 } else {
443 PERROR("close");
444 assert(0);
445 }
446 lttng_ust_unlock_fd_tracker();
447 } else {
448 ret = close(obj->wait_fd[i]);
449 if (ret) {
450 PERROR("close");
451 assert(0);
452 }
453 }
454 }
455 break;
456 }
457 case SHM_OBJECT_MEM:
458 {
459 int ret, i;
460
461 for (i = 0; i < 2; i++) {
462 if (obj->wait_fd[i] < 0)
463 continue;
464 if (!consumer) {
465 lttng_ust_lock_fd_tracker();
466 ret = close(obj->wait_fd[i]);
467 if (!ret) {
468 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
469 } else {
470 PERROR("close");
471 assert(0);
472 }
473 lttng_ust_unlock_fd_tracker();
474 } else {
475 ret = close(obj->wait_fd[i]);
476 if (ret) {
477 PERROR("close");
478 assert(0);
479 }
480 }
481 }
482 free(obj->memory_map);
483 break;
484 }
485 default:
486 assert(0);
487 }
488 }
489
490 void shm_object_table_destroy(struct shm_object_table *table, int consumer)
491 {
492 int i;
493
494 for (i = 0; i < table->allocated_len; i++)
495 shmp_object_destroy(&table->objects[i], consumer);
496 free(table);
497 }
498
499 /*
500 * zalloc_shm - allocate memory within a shm object.
501 *
502 * Shared memory is already zeroed by shmget.
503 * *NOT* multithread-safe (should be protected by mutex).
504 * Returns a -1, -1 tuple on error.
505 */
506 struct shm_ref zalloc_shm(struct shm_object *obj, size_t len)
507 {
508 struct shm_ref ref;
509 struct shm_ref shm_ref_error = { -1, -1 };
510
511 if (obj->memory_map_size - obj->allocated_len < len)
512 return shm_ref_error;
513 ref.index = obj->index;
514 ref.offset = obj->allocated_len;
515 obj->allocated_len += len;
516 return ref;
517 }
518
519 void align_shm(struct shm_object *obj, size_t align)
520 {
521 size_t offset_len = offset_align(obj->allocated_len, align);
522 obj->allocated_len += offset_len;
523 }
This page took 0.041342 seconds and 4 git commands to generate.