1ac59f9eee138c2744a5ada5bd58c05476de9800
[lttng-ust.git] / libringbuffer / shm.c
1 /*
2 * libringbuffer/shm.c
3 *
4 * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; only
9 * version 2.1 of the License.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #define _LGPL_SOURCE
22 #include "shm.h"
23 #include <unistd.h>
24 #include <fcntl.h>
25 #include <sys/mman.h>
26 #include <sys/types.h>
27 #include <sys/stat.h> /* For mode constants */
28 #include <fcntl.h> /* For O_* constants */
29 #include <assert.h>
30 #include <stdio.h>
31 #include <signal.h>
32 #include <dirent.h>
33 #include <lttng/align.h>
34 #include <limits.h>
35 #include <stdbool.h>
36 #include <stdint.h>
37 #ifdef HAVE_LIBNUMA
38 #include <numa.h>
39 #include <numaif.h>
40 #endif
41 #include <helper.h>
42 #include <ust-fd.h>
43 #include "mmap.h"
44
45 /*
46 * Ensure we have the required amount of space available by writing 0
47 * into the entire buffer. Not doing so can trigger SIGBUS when going
48 * beyond the available shm space.
49 */
50 static
51 int zero_file(int fd, size_t len)
52 {
53 ssize_t retlen;
54 size_t written = 0;
55 char *zeropage;
56 long pagelen;
57 int ret;
58
59 pagelen = sysconf(_SC_PAGESIZE);
60 if (pagelen < 0)
61 return (int) pagelen;
62 zeropage = calloc(pagelen, 1);
63 if (!zeropage)
64 return -ENOMEM;
65
66 while (len > written) {
67 do {
68 retlen = write(fd, zeropage,
69 min_t(size_t, pagelen, len - written));
70 } while (retlen == -1UL && errno == EINTR);
71 if (retlen < 0) {
72 ret = (int) retlen;
73 goto error;
74 }
75 written += retlen;
76 }
77 ret = 0;
78 error:
79 free(zeropage);
80 return ret;
81 }
82
83 struct shm_object_table *shm_object_table_create(size_t max_nb_obj)
84 {
85 struct shm_object_table *table;
86
87 table = zmalloc(sizeof(struct shm_object_table) +
88 max_nb_obj * sizeof(table->objects[0]));
89 if (!table)
90 return NULL;
91 table->size = max_nb_obj;
92 return table;
93 }
94
95 static
96 struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
97 size_t memory_map_size,
98 int stream_fd)
99 {
100 int shmfd, waitfd[2], ret, i;
101 struct shm_object *obj;
102 char *memory_map;
103
104 if (stream_fd < 0)
105 return NULL;
106 if (table->allocated_len >= table->size)
107 return NULL;
108 obj = &table->objects[table->allocated_len];
109
110 /* wait_fd: create pipe */
111 ret = pipe(waitfd);
112 if (ret < 0) {
113 PERROR("pipe");
114 goto error_pipe;
115 }
116 for (i = 0; i < 2; i++) {
117 ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC);
118 if (ret < 0) {
119 PERROR("fcntl");
120 goto error_fcntl;
121 }
122 }
123 /* The write end of the pipe needs to be non-blocking */
124 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
125 if (ret < 0) {
126 PERROR("fcntl");
127 goto error_fcntl;
128 }
129 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
130
131 /*
132 * Set POSIX shared memory object size
133 *
134 * First, use ftruncate() to set its size, some implementations won't
135 * allow writes past the size set by ftruncate.
136 * Then, use write() to fill it with zeros, this allows us to fully
137 * allocate it and detect a shortage of shm space without dealing with
138 * a SIGBUS.
139 */
140
141 shmfd = stream_fd;
142 ret = ftruncate(shmfd, memory_map_size);
143 if (ret) {
144 PERROR("ftruncate");
145 goto error_ftruncate;
146 }
147 ret = zero_file(shmfd, memory_map_size);
148 if (ret) {
149 PERROR("zero_file");
150 goto error_zero_file;
151 }
152
153 /*
154 * Also ensure the file metadata is synced with the storage by using
155 * fsync(2). Some platforms don't allow fsync on POSIX shm fds, ignore
156 * EINVAL accordingly.
157 */
158 ret = fsync(shmfd);
159 if (ret && errno != EINVAL) {
160 PERROR("fsync");
161 goto error_fsync;
162 }
163 obj->shm_fd_ownership = 0;
164 obj->shm_fd = shmfd;
165
166 /* memory_map: mmap */
167 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
168 MAP_SHARED | LTTNG_MAP_POPULATE, shmfd, 0);
169 if (memory_map == MAP_FAILED) {
170 PERROR("mmap");
171 goto error_mmap;
172 }
173 obj->type = SHM_OBJECT_SHM;
174 obj->memory_map = memory_map;
175 obj->memory_map_size = memory_map_size;
176 obj->allocated_len = 0;
177 obj->index = table->allocated_len++;
178
179 return obj;
180
181 error_mmap:
182 error_fsync:
183 error_ftruncate:
184 error_zero_file:
185 error_fcntl:
186 for (i = 0; i < 2; i++) {
187 ret = close(waitfd[i]);
188 if (ret) {
189 PERROR("close");
190 assert(0);
191 }
192 }
193 error_pipe:
194 return NULL;
195 }
196
197 static
198 struct shm_object *_shm_object_table_alloc_mem(struct shm_object_table *table,
199 size_t memory_map_size)
200 {
201 struct shm_object *obj;
202 void *memory_map;
203 int waitfd[2], i, ret;
204
205 if (table->allocated_len >= table->size)
206 return NULL;
207 obj = &table->objects[table->allocated_len];
208
209 memory_map = zmalloc(memory_map_size);
210 if (!memory_map)
211 goto alloc_error;
212
213 /* wait_fd: create pipe */
214 ret = pipe(waitfd);
215 if (ret < 0) {
216 PERROR("pipe");
217 goto error_pipe;
218 }
219 for (i = 0; i < 2; i++) {
220 ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC);
221 if (ret < 0) {
222 PERROR("fcntl");
223 goto error_fcntl;
224 }
225 }
226 /* The write end of the pipe needs to be non-blocking */
227 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
228 if (ret < 0) {
229 PERROR("fcntl");
230 goto error_fcntl;
231 }
232 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
233
234 /* no shm_fd */
235 obj->shm_fd = -1;
236 obj->shm_fd_ownership = 0;
237
238 obj->type = SHM_OBJECT_MEM;
239 obj->memory_map = memory_map;
240 obj->memory_map_size = memory_map_size;
241 obj->allocated_len = 0;
242 obj->index = table->allocated_len++;
243
244 return obj;
245
246 error_fcntl:
247 for (i = 0; i < 2; i++) {
248 ret = close(waitfd[i]);
249 if (ret) {
250 PERROR("close");
251 assert(0);
252 }
253 }
254 error_pipe:
255 free(memory_map);
256 alloc_error:
257 return NULL;
258 }
259
260 /*
261 * libnuma prints errors on the console even for numa_available().
262 * Work-around this limitation by using get_mempolicy() directly to
263 * check whether the kernel supports mempolicy.
264 */
265 #ifdef HAVE_LIBNUMA
266 static bool lttng_is_numa_available(void)
267 {
268 int ret;
269
270 ret = get_mempolicy(NULL, NULL, 0, NULL, 0);
271 if (ret && errno == ENOSYS) {
272 return false;
273 }
274 return numa_available() > 0;
275 }
276 #endif
277
278 struct shm_object *shm_object_table_alloc(struct shm_object_table *table,
279 size_t memory_map_size,
280 enum shm_object_type type,
281 int stream_fd,
282 int cpu)
283 {
284 struct shm_object *shm_object;
285 #ifdef HAVE_LIBNUMA
286 int oldnode = 0, node;
287 bool numa_avail;
288
289 numa_avail = lttng_is_numa_available();
290 if (numa_avail) {
291 oldnode = numa_preferred();
292 if (cpu >= 0) {
293 node = numa_node_of_cpu(cpu);
294 if (node >= 0)
295 numa_set_preferred(node);
296 }
297 if (cpu < 0 || node < 0)
298 numa_set_localalloc();
299 }
300 #endif /* HAVE_LIBNUMA */
301 switch (type) {
302 case SHM_OBJECT_SHM:
303 shm_object = _shm_object_table_alloc_shm(table, memory_map_size,
304 stream_fd);
305 break;
306 case SHM_OBJECT_MEM:
307 shm_object = _shm_object_table_alloc_mem(table, memory_map_size);
308 break;
309 default:
310 assert(0);
311 }
312 #ifdef HAVE_LIBNUMA
313 if (numa_avail)
314 numa_set_preferred(oldnode);
315 #endif /* HAVE_LIBNUMA */
316 return shm_object;
317 }
318
319 struct shm_object *shm_object_table_append_shm(struct shm_object_table *table,
320 int shm_fd, int wakeup_fd, uint32_t stream_nr,
321 size_t memory_map_size)
322 {
323 struct shm_object *obj;
324 char *memory_map;
325 int ret;
326
327 if (table->allocated_len >= table->size)
328 return NULL;
329 /* streams _must_ be received in sequential order, else fail. */
330 if (stream_nr + 1 != table->allocated_len)
331 return NULL;
332
333 obj = &table->objects[table->allocated_len];
334
335 /* wait_fd: set write end of the pipe. */
336 obj->wait_fd[0] = -1; /* read end is unset */
337 obj->wait_fd[1] = wakeup_fd;
338 obj->shm_fd = shm_fd;
339 obj->shm_fd_ownership = 1;
340
341 /* The write end of the pipe needs to be non-blocking */
342 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
343 if (ret < 0) {
344 PERROR("fcntl");
345 goto error_fcntl;
346 }
347
348 /* memory_map: mmap */
349 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
350 MAP_SHARED | LTTNG_MAP_POPULATE, shm_fd, 0);
351 if (memory_map == MAP_FAILED) {
352 PERROR("mmap");
353 goto error_mmap;
354 }
355 obj->type = SHM_OBJECT_SHM;
356 obj->memory_map = memory_map;
357 obj->memory_map_size = memory_map_size;
358 obj->allocated_len = memory_map_size;
359 obj->index = table->allocated_len++;
360
361 return obj;
362
363 error_fcntl:
364 error_mmap:
365 return NULL;
366 }
367
368 /*
369 * Passing ownership of mem to object.
370 */
371 struct shm_object *shm_object_table_append_mem(struct shm_object_table *table,
372 void *mem, size_t memory_map_size, int wakeup_fd)
373 {
374 struct shm_object *obj;
375 int ret;
376
377 if (table->allocated_len >= table->size)
378 return NULL;
379 obj = &table->objects[table->allocated_len];
380
381 obj->wait_fd[0] = -1; /* read end is unset */
382 obj->wait_fd[1] = wakeup_fd;
383 obj->shm_fd = -1;
384 obj->shm_fd_ownership = 0;
385
386 ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC);
387 if (ret < 0) {
388 PERROR("fcntl");
389 goto error_fcntl;
390 }
391 /* The write end of the pipe needs to be non-blocking */
392 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
393 if (ret < 0) {
394 PERROR("fcntl");
395 goto error_fcntl;
396 }
397
398 obj->type = SHM_OBJECT_MEM;
399 obj->memory_map = mem;
400 obj->memory_map_size = memory_map_size;
401 obj->allocated_len = memory_map_size;
402 obj->index = table->allocated_len++;
403
404 return obj;
405
406 error_fcntl:
407 return NULL;
408 }
409
410 static
411 void shmp_object_destroy(struct shm_object *obj, int consumer)
412 {
413 switch (obj->type) {
414 case SHM_OBJECT_SHM:
415 {
416 int ret, i;
417
418 ret = munmap(obj->memory_map, obj->memory_map_size);
419 if (ret) {
420 PERROR("umnmap");
421 assert(0);
422 }
423
424 if (obj->shm_fd_ownership) {
425 /* Delete FDs only if called from app (not consumer). */
426 if (!consumer) {
427 lttng_ust_lock_fd_tracker();
428 ret = close(obj->shm_fd);
429 if (!ret) {
430 lttng_ust_delete_fd_from_tracker(obj->shm_fd);
431 } else {
432 PERROR("close");
433 assert(0);
434 }
435 lttng_ust_unlock_fd_tracker();
436 } else {
437 ret = close(obj->shm_fd);
438 if (ret) {
439 PERROR("close");
440 assert(0);
441 }
442 }
443 }
444 for (i = 0; i < 2; i++) {
445 if (obj->wait_fd[i] < 0)
446 continue;
447 if (!consumer) {
448 lttng_ust_lock_fd_tracker();
449 ret = close(obj->wait_fd[i]);
450 if (!ret) {
451 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
452 } else {
453 PERROR("close");
454 assert(0);
455 }
456 lttng_ust_unlock_fd_tracker();
457 } else {
458 ret = close(obj->wait_fd[i]);
459 if (ret) {
460 PERROR("close");
461 assert(0);
462 }
463 }
464 }
465 break;
466 }
467 case SHM_OBJECT_MEM:
468 {
469 int ret, i;
470
471 for (i = 0; i < 2; i++) {
472 if (obj->wait_fd[i] < 0)
473 continue;
474 if (!consumer) {
475 lttng_ust_lock_fd_tracker();
476 ret = close(obj->wait_fd[i]);
477 if (!ret) {
478 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
479 } else {
480 PERROR("close");
481 assert(0);
482 }
483 lttng_ust_unlock_fd_tracker();
484 } else {
485 ret = close(obj->wait_fd[i]);
486 if (ret) {
487 PERROR("close");
488 assert(0);
489 }
490 }
491 }
492 free(obj->memory_map);
493 break;
494 }
495 default:
496 assert(0);
497 }
498 }
499
500 void shm_object_table_destroy(struct shm_object_table *table, int consumer)
501 {
502 int i;
503
504 for (i = 0; i < table->allocated_len; i++)
505 shmp_object_destroy(&table->objects[i], consumer);
506 free(table);
507 }
508
509 /*
510 * zalloc_shm - allocate memory within a shm object.
511 *
512 * Shared memory is already zeroed by shmget.
513 * *NOT* multithread-safe (should be protected by mutex).
514 * Returns a -1, -1 tuple on error.
515 */
516 struct shm_ref zalloc_shm(struct shm_object *obj, size_t len)
517 {
518 struct shm_ref ref;
519 struct shm_ref shm_ref_error = { -1, -1 };
520
521 if (obj->memory_map_size - obj->allocated_len < len)
522 return shm_ref_error;
523 ref.index = obj->index;
524 ref.offset = obj->allocated_len;
525 obj->allocated_len += len;
526 return ref;
527 }
528
529 void align_shm(struct shm_object *obj, size_t align)
530 {
531 size_t offset_len = lttng_ust_offset_align(obj->allocated_len, align);
532 obj->allocated_len += offset_len;
533 }
This page took 0.038799 seconds and 3 git commands to generate.