numa support: allow disabling numa support
[lttng-ust.git] / libringbuffer / shm.c
1 /*
2 * libringbuffer/shm.c
3 *
4 * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; only
9 * version 2.1 of the License.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #define _LGPL_SOURCE
22 #include <config.h>
23 #include "shm.h"
24 #include <unistd.h>
25 #include <fcntl.h>
26 #include <sys/mman.h>
27 #include <sys/types.h>
28 #include <sys/stat.h> /* For mode constants */
29 #include <fcntl.h> /* For O_* constants */
30 #include <assert.h>
31 #include <stdio.h>
32 #include <signal.h>
33 #include <dirent.h>
34 #include <lttng/align.h>
35 #include <limits.h>
36 #ifdef HAVE_LIBNUMA
37 #include <numa.h>
38 #endif
39 #include <helper.h>
40 #include <ust-fd.h>
41
42 /*
43 * Ensure we have the required amount of space available by writing 0
44 * into the entire buffer. Not doing so can trigger SIGBUS when going
45 * beyond the available shm space.
46 */
47 static
48 int zero_file(int fd, size_t len)
49 {
50 ssize_t retlen;
51 size_t written = 0;
52 char *zeropage;
53 long pagelen;
54 int ret;
55
56 pagelen = sysconf(_SC_PAGESIZE);
57 if (pagelen < 0)
58 return (int) pagelen;
59 zeropage = calloc(pagelen, 1);
60 if (!zeropage)
61 return -ENOMEM;
62
63 while (len > written) {
64 do {
65 retlen = write(fd, zeropage,
66 min_t(size_t, pagelen, len - written));
67 } while (retlen == -1UL && errno == EINTR);
68 if (retlen < 0) {
69 ret = (int) retlen;
70 goto error;
71 }
72 written += retlen;
73 }
74 ret = 0;
75 error:
76 free(zeropage);
77 return ret;
78 }
79
80 struct shm_object_table *shm_object_table_create(size_t max_nb_obj)
81 {
82 struct shm_object_table *table;
83
84 table = zmalloc(sizeof(struct shm_object_table) +
85 max_nb_obj * sizeof(table->objects[0]));
86 if (!table)
87 return NULL;
88 table->size = max_nb_obj;
89 return table;
90 }
91
92 static
93 struct shm_object *_shm_object_table_alloc_shm(struct shm_object_table *table,
94 size_t memory_map_size,
95 int stream_fd)
96 {
97 int shmfd, waitfd[2], ret, i;
98 struct shm_object *obj;
99 char *memory_map;
100
101 if (stream_fd < 0)
102 return NULL;
103 if (table->allocated_len >= table->size)
104 return NULL;
105 obj = &table->objects[table->allocated_len];
106
107 /* wait_fd: create pipe */
108 ret = pipe(waitfd);
109 if (ret < 0) {
110 PERROR("pipe");
111 goto error_pipe;
112 }
113 for (i = 0; i < 2; i++) {
114 ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC);
115 if (ret < 0) {
116 PERROR("fcntl");
117 goto error_fcntl;
118 }
119 }
120 /* The write end of the pipe needs to be non-blocking */
121 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
122 if (ret < 0) {
123 PERROR("fcntl");
124 goto error_fcntl;
125 }
126 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
127
128 /* create shm */
129
130 shmfd = stream_fd;
131 ret = zero_file(shmfd, memory_map_size);
132 if (ret) {
133 PERROR("zero_file");
134 goto error_zero_file;
135 }
136 ret = ftruncate(shmfd, memory_map_size);
137 if (ret) {
138 PERROR("ftruncate");
139 goto error_ftruncate;
140 }
141 /*
142 * Also ensure the file metadata is synced with the storage by using
143 * fsync(2).
144 */
145 ret = fsync(shmfd);
146 if (ret) {
147 PERROR("fsync");
148 goto error_fsync;
149 }
150 obj->shm_fd_ownership = 0;
151 obj->shm_fd = shmfd;
152
153 /* memory_map: mmap */
154 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
155 MAP_SHARED, shmfd, 0);
156 if (memory_map == MAP_FAILED) {
157 PERROR("mmap");
158 goto error_mmap;
159 }
160 obj->type = SHM_OBJECT_SHM;
161 obj->memory_map = memory_map;
162 obj->memory_map_size = memory_map_size;
163 obj->allocated_len = 0;
164 obj->index = table->allocated_len++;
165
166 return obj;
167
168 error_mmap:
169 error_fsync:
170 error_ftruncate:
171 error_zero_file:
172 error_fcntl:
173 for (i = 0; i < 2; i++) {
174 ret = close(waitfd[i]);
175 if (ret) {
176 PERROR("close");
177 assert(0);
178 }
179 }
180 error_pipe:
181 return NULL;
182 }
183
184 static
185 struct shm_object *_shm_object_table_alloc_mem(struct shm_object_table *table,
186 size_t memory_map_size)
187 {
188 struct shm_object *obj;
189 void *memory_map;
190 int waitfd[2], i, ret;
191
192 if (table->allocated_len >= table->size)
193 return NULL;
194 obj = &table->objects[table->allocated_len];
195
196 memory_map = zmalloc(memory_map_size);
197 if (!memory_map)
198 goto alloc_error;
199
200 /* wait_fd: create pipe */
201 ret = pipe(waitfd);
202 if (ret < 0) {
203 PERROR("pipe");
204 goto error_pipe;
205 }
206 for (i = 0; i < 2; i++) {
207 ret = fcntl(waitfd[i], F_SETFD, FD_CLOEXEC);
208 if (ret < 0) {
209 PERROR("fcntl");
210 goto error_fcntl;
211 }
212 }
213 /* The write end of the pipe needs to be non-blocking */
214 ret = fcntl(waitfd[1], F_SETFL, O_NONBLOCK);
215 if (ret < 0) {
216 PERROR("fcntl");
217 goto error_fcntl;
218 }
219 memcpy(obj->wait_fd, waitfd, sizeof(waitfd));
220
221 /* no shm_fd */
222 obj->shm_fd = -1;
223 obj->shm_fd_ownership = 0;
224
225 obj->type = SHM_OBJECT_MEM;
226 obj->memory_map = memory_map;
227 obj->memory_map_size = memory_map_size;
228 obj->allocated_len = 0;
229 obj->index = table->allocated_len++;
230
231 return obj;
232
233 error_fcntl:
234 for (i = 0; i < 2; i++) {
235 ret = close(waitfd[i]);
236 if (ret) {
237 PERROR("close");
238 assert(0);
239 }
240 }
241 error_pipe:
242 free(memory_map);
243 alloc_error:
244 return NULL;
245 }
246
247 struct shm_object *shm_object_table_alloc(struct shm_object_table *table,
248 size_t memory_map_size,
249 enum shm_object_type type,
250 int stream_fd,
251 int cpu)
252 {
253 struct shm_object *shm_object;
254 #ifdef HAVE_LIBNUMA
255 int oldnode, node;
256
257 oldnode = numa_preferred();
258 if (cpu >= 0) {
259 node = numa_node_of_cpu(cpu);
260 if (node >= 0)
261 numa_set_preferred(node);
262 }
263 if (cpu < 0 || node < 0)
264 numa_set_localalloc();
265 #endif /* HAVE_LIBNUMA */
266 switch (type) {
267 case SHM_OBJECT_SHM:
268 shm_object = _shm_object_table_alloc_shm(table, memory_map_size,
269 stream_fd);
270 break;
271 case SHM_OBJECT_MEM:
272 shm_object = _shm_object_table_alloc_mem(table, memory_map_size);
273 break;
274 default:
275 assert(0);
276 }
277 #ifdef HAVE_LIBNUMA
278 numa_set_preferred(oldnode);
279 #endif /* HAVE_LIBNUMA */
280 return shm_object;
281 }
282
283 struct shm_object *shm_object_table_append_shm(struct shm_object_table *table,
284 int shm_fd, int wakeup_fd, uint32_t stream_nr,
285 size_t memory_map_size)
286 {
287 struct shm_object *obj;
288 char *memory_map;
289 int ret;
290
291 if (table->allocated_len >= table->size)
292 return NULL;
293 /* streams _must_ be received in sequential order, else fail. */
294 if (stream_nr + 1 != table->allocated_len)
295 return NULL;
296
297 obj = &table->objects[table->allocated_len];
298
299 /* wait_fd: set write end of the pipe. */
300 obj->wait_fd[0] = -1; /* read end is unset */
301 obj->wait_fd[1] = wakeup_fd;
302 obj->shm_fd = shm_fd;
303 obj->shm_fd_ownership = 1;
304
305 ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC);
306 if (ret < 0) {
307 PERROR("fcntl");
308 goto error_fcntl;
309 }
310 /* The write end of the pipe needs to be non-blocking */
311 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
312 if (ret < 0) {
313 PERROR("fcntl");
314 goto error_fcntl;
315 }
316
317 /* memory_map: mmap */
318 memory_map = mmap(NULL, memory_map_size, PROT_READ | PROT_WRITE,
319 MAP_SHARED, shm_fd, 0);
320 if (memory_map == MAP_FAILED) {
321 PERROR("mmap");
322 goto error_mmap;
323 }
324 obj->type = SHM_OBJECT_SHM;
325 obj->memory_map = memory_map;
326 obj->memory_map_size = memory_map_size;
327 obj->allocated_len = memory_map_size;
328 obj->index = table->allocated_len++;
329
330 return obj;
331
332 error_fcntl:
333 error_mmap:
334 return NULL;
335 }
336
337 /*
338 * Passing ownership of mem to object.
339 */
340 struct shm_object *shm_object_table_append_mem(struct shm_object_table *table,
341 void *mem, size_t memory_map_size, int wakeup_fd)
342 {
343 struct shm_object *obj;
344 int ret;
345
346 if (table->allocated_len >= table->size)
347 return NULL;
348 obj = &table->objects[table->allocated_len];
349
350 obj->wait_fd[0] = -1; /* read end is unset */
351 obj->wait_fd[1] = wakeup_fd;
352 obj->shm_fd = -1;
353 obj->shm_fd_ownership = 0;
354
355 ret = fcntl(obj->wait_fd[1], F_SETFD, FD_CLOEXEC);
356 if (ret < 0) {
357 PERROR("fcntl");
358 goto error_fcntl;
359 }
360 /* The write end of the pipe needs to be non-blocking */
361 ret = fcntl(obj->wait_fd[1], F_SETFL, O_NONBLOCK);
362 if (ret < 0) {
363 PERROR("fcntl");
364 goto error_fcntl;
365 }
366
367 obj->type = SHM_OBJECT_MEM;
368 obj->memory_map = mem;
369 obj->memory_map_size = memory_map_size;
370 obj->allocated_len = memory_map_size;
371 obj->index = table->allocated_len++;
372
373 return obj;
374
375 error_fcntl:
376 return NULL;
377 }
378
379 static
380 void shmp_object_destroy(struct shm_object *obj, int consumer)
381 {
382 switch (obj->type) {
383 case SHM_OBJECT_SHM:
384 {
385 int ret, i;
386
387 ret = munmap(obj->memory_map, obj->memory_map_size);
388 if (ret) {
389 PERROR("umnmap");
390 assert(0);
391 }
392
393 if (obj->shm_fd_ownership) {
394 /* Delete FDs only if called from app (not consumer). */
395 if (!consumer) {
396 lttng_ust_lock_fd_tracker();
397 ret = close(obj->shm_fd);
398 if (!ret) {
399 lttng_ust_delete_fd_from_tracker(obj->shm_fd);
400 } else {
401 PERROR("close");
402 assert(0);
403 }
404 lttng_ust_unlock_fd_tracker();
405 } else {
406 ret = close(obj->shm_fd);
407 if (ret) {
408 PERROR("close");
409 assert(0);
410 }
411 }
412 }
413 for (i = 0; i < 2; i++) {
414 if (obj->wait_fd[i] < 0)
415 continue;
416 if (!consumer) {
417 lttng_ust_lock_fd_tracker();
418 ret = close(obj->wait_fd[i]);
419 if (!ret) {
420 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
421 } else {
422 PERROR("close");
423 assert(0);
424 }
425 lttng_ust_unlock_fd_tracker();
426 } else {
427 ret = close(obj->wait_fd[i]);
428 if (ret) {
429 PERROR("close");
430 assert(0);
431 }
432 }
433 }
434 break;
435 }
436 case SHM_OBJECT_MEM:
437 {
438 int ret, i;
439
440 for (i = 0; i < 2; i++) {
441 if (obj->wait_fd[i] < 0)
442 continue;
443 if (!consumer) {
444 lttng_ust_lock_fd_tracker();
445 ret = close(obj->wait_fd[i]);
446 if (!ret) {
447 lttng_ust_delete_fd_from_tracker(obj->wait_fd[i]);
448 } else {
449 PERROR("close");
450 assert(0);
451 }
452 lttng_ust_unlock_fd_tracker();
453 } else {
454 ret = close(obj->wait_fd[i]);
455 if (ret) {
456 PERROR("close");
457 assert(0);
458 }
459 }
460 }
461 free(obj->memory_map);
462 break;
463 }
464 default:
465 assert(0);
466 }
467 }
468
469 void shm_object_table_destroy(struct shm_object_table *table, int consumer)
470 {
471 int i;
472
473 for (i = 0; i < table->allocated_len; i++)
474 shmp_object_destroy(&table->objects[i], consumer);
475 free(table);
476 }
477
478 /*
479 * zalloc_shm - allocate memory within a shm object.
480 *
481 * Shared memory is already zeroed by shmget.
482 * *NOT* multithread-safe (should be protected by mutex).
483 * Returns a -1, -1 tuple on error.
484 */
485 struct shm_ref zalloc_shm(struct shm_object *obj, size_t len)
486 {
487 struct shm_ref ref;
488 struct shm_ref shm_ref_error = { -1, -1 };
489
490 if (obj->memory_map_size - obj->allocated_len < len)
491 return shm_ref_error;
492 ref.index = obj->index;
493 ref.offset = obj->allocated_len;
494 obj->allocated_len += len;
495 return ref;
496 }
497
498 void align_shm(struct shm_object *obj, size_t align)
499 {
500 size_t offset_len = offset_align(obj->allocated_len, align);
501 obj->allocated_len += offset_len;
502 }
This page took 0.039206 seconds and 4 git commands to generate.