Update tests/hello test-case layout (cleanup)
[lttng-ust.git] / libringbuffer / ring_buffer_frontend.c
CommitLineData
852c2936
MD
1/*
2 * ring_buffer_frontend.c
3 *
4 * (C) Copyright 2005-2010 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
5 *
6 * Ring buffer wait-free buffer synchronization. Producer-consumer and flight
7 * recorder (overwrite) modes. See thesis:
8 *
9 * Desnoyers, Mathieu (2009), "Low-Impact Operating System Tracing", Ph.D.
10 * dissertation, Ecole Polytechnique de Montreal.
11 * http://www.lttng.org/pub/thesis/desnoyers-dissertation-2009-12.pdf
12 *
13 * - Algorithm presentation in Chapter 5:
14 * "Lockless Multi-Core High-Throughput Buffering".
15 * - Algorithm formal verification in Section 8.6:
16 * "Formal verification of LTTng"
17 *
18 * Author:
19 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
20 *
21 * Inspired from LTT and RelayFS:
22 * Karim Yaghmour <karim@opersys.com>
23 * Tom Zanussi <zanussi@us.ibm.com>
24 * Bob Wisniewski <bob@watson.ibm.com>
25 * And from K42 :
26 * Bob Wisniewski <bob@watson.ibm.com>
27 *
28 * Buffer reader semantic :
29 *
30 * - get_subbuf_size
31 * while buffer is not finalized and empty
32 * - get_subbuf
33 * - if return value != 0, continue
34 * - splice one subbuffer worth of data to a pipe
35 * - splice the data from pipe to disk/network
36 * - put_subbuf
37 *
38 * Dual LGPL v2.1/GPL v2 license.
39 */
40
a6352fd4 41#include <sys/types.h>
431d5cf0
MD
42#include <sys/mman.h>
43#include <sys/stat.h>
44#include <fcntl.h>
14641deb 45#include <urcu/compiler.h>
a6352fd4 46#include <urcu/ref.h>
14641deb 47
a6352fd4 48#include "smp.h"
8d8a24c8 49#include <ust/ringbuffer-config.h>
4931a13e
MD
50#include "backend.h"
51#include "frontend.h"
a6352fd4 52#include "shm.h"
852c2936 53
431d5cf0
MD
54#ifndef max
55#define max(a, b) ((a) > (b) ? (a) : (b))
56#endif
57
2432c3c9
MD
58/*
59 * Use POSIX SHM: shm_open(3) and shm_unlink(3).
60 * close(2) to close the fd returned by shm_open.
61 * shm_unlink releases the shared memory object name.
62 * ftruncate(2) sets the size of the memory object.
63 * mmap/munmap maps the shared memory obj to a virtual address in the
64 * calling proceess (should be done both in libust and consumer).
65 * See shm_overview(7) for details.
66 * Pass file descriptor returned by shm_open(3) to ltt-sessiond through
67 * a UNIX socket.
68 *
69 * Since we don't need to access the object using its name, we can
70 * immediately shm_unlink(3) it, and only keep the handle with its file
71 * descriptor.
72 */
73
852c2936
MD
74/*
75 * Internal structure representing offsets to use at a sub-buffer switch.
76 */
77struct switch_offsets {
78 unsigned long begin, end, old;
79 size_t pre_header_padding, size;
80 unsigned int switch_new_start:1, switch_new_end:1, switch_old_start:1,
81 switch_old_end:1;
82};
83
a6352fd4 84__thread unsigned int lib_ring_buffer_nesting;
852c2936
MD
85
86static
87void lib_ring_buffer_print_errors(struct channel *chan,
88 struct lib_ring_buffer *buf, int cpu);
89
90/*
91 * Must be called under cpu hotplug protection.
92 */
93void lib_ring_buffer_free(struct lib_ring_buffer *buf)
94{
a6352fd4 95 struct channel *chan = shmp(buf->backend.chan);
852c2936
MD
96
97 lib_ring_buffer_print_errors(chan, buf, buf->backend.cpu);
431d5cf0
MD
98 /* buf->commit_hot will be freed by shm teardown */
99 /* buf->commit_cold will be freed by shm teardown */
852c2936
MD
100
101 lib_ring_buffer_backend_free(&buf->backend);
102}
103
104/**
105 * lib_ring_buffer_reset - Reset ring buffer to initial values.
106 * @buf: Ring buffer.
107 *
108 * Effectively empty the ring buffer. Should be called when the buffer is not
109 * used for writing. The ring buffer can be opened for reading, but the reader
110 * should not be using the iterator concurrently with reset. The previous
111 * current iterator record is reset.
112 */
113void lib_ring_buffer_reset(struct lib_ring_buffer *buf)
114{
a6352fd4 115 struct channel *chan = shmp(buf->backend.chan);
852c2936
MD
116 const struct lib_ring_buffer_config *config = chan->backend.config;
117 unsigned int i;
118
119 /*
120 * Reset iterator first. It will put the subbuffer if it currently holds
121 * it.
122 */
852c2936
MD
123 v_set(config, &buf->offset, 0);
124 for (i = 0; i < chan->backend.num_subbuf; i++) {
a6352fd4
MD
125 v_set(config, &shmp(buf->commit_hot)[i].cc, 0);
126 v_set(config, &shmp(buf->commit_hot)[i].seq, 0);
127 v_set(config, &shmp(buf->commit_cold)[i].cc_sb, 0);
852c2936 128 }
a6352fd4
MD
129 uatomic_set(&buf->consumed, 0);
130 uatomic_set(&buf->record_disabled, 0);
852c2936
MD
131 v_set(config, &buf->last_tsc, 0);
132 lib_ring_buffer_backend_reset(&buf->backend);
133 /* Don't reset number of active readers */
134 v_set(config, &buf->records_lost_full, 0);
135 v_set(config, &buf->records_lost_wrap, 0);
136 v_set(config, &buf->records_lost_big, 0);
137 v_set(config, &buf->records_count, 0);
138 v_set(config, &buf->records_overrun, 0);
139 buf->finalized = 0;
140}
852c2936
MD
141
142/**
143 * channel_reset - Reset channel to initial values.
144 * @chan: Channel.
145 *
146 * Effectively empty the channel. Should be called when the channel is not used
147 * for writing. The channel can be opened for reading, but the reader should not
148 * be using the iterator concurrently with reset. The previous current iterator
149 * record is reset.
150 */
151void channel_reset(struct channel *chan)
152{
153 /*
154 * Reset iterators first. Will put the subbuffer if held for reading.
155 */
a6352fd4 156 uatomic_set(&chan->record_disabled, 0);
852c2936
MD
157 /* Don't reset commit_count_mask, still valid */
158 channel_backend_reset(&chan->backend);
159 /* Don't reset switch/read timer interval */
160 /* Don't reset notifiers and notifier enable bits */
161 /* Don't reset reader reference count */
162}
852c2936
MD
163
164/*
165 * Must be called under cpu hotplug protection.
166 */
167int lib_ring_buffer_create(struct lib_ring_buffer *buf,
a6352fd4
MD
168 struct channel_backend *chanb, int cpu,
169 struct shm_header *shm_header)
852c2936
MD
170{
171 const struct lib_ring_buffer_config *config = chanb->config;
14641deb 172 struct channel *chan = caa_container_of(chanb, struct channel, backend);
852c2936
MD
173 void *priv = chanb->priv;
174 unsigned int num_subbuf;
175 size_t subbuf_header_size;
176 u64 tsc;
177 int ret;
178
179 /* Test for cpu hotplug */
180 if (buf->backend.allocated)
181 return 0;
182
a6352fd4
MD
183 ret = lib_ring_buffer_backend_create(&buf->backend, &chan->backend,
184 cpu, shm_header);
852c2936
MD
185 if (ret)
186 return ret;
187
431d5cf0
MD
188 align_shm(shm_header,
189 max(__alignof__(struct commit_counters_hot),
190 __alignof__(struct commit_counters_cold)));
a6352fd4
MD
191 set_shmp(&buf->commit_hot,
192 zalloc_shm(shm_header,
193 sizeof(*buf->commit_hot) * chan->backend.num_subbuf));
194 if (!shmp(buf->commit_hot)) {
852c2936
MD
195 ret = -ENOMEM;
196 goto free_chanbuf;
197 }
198
431d5cf0 199 align_shm(shm_header, __alignof__(struct commit_counters_cold));
a6352fd4
MD
200 set_shmp(&buf->commit_cold,
201 zalloc_shm(shm_header,
202 sizeof(*buf->commit_cold) * chan->backend.num_subbuf));
203 if (!shmp(buf->commit_cold)) {
852c2936
MD
204 ret = -ENOMEM;
205 goto free_commit;
206 }
207
208 num_subbuf = chan->backend.num_subbuf;
a6352fd4 209 //init_waitqueue_head(&buf->read_wait);
852c2936
MD
210
211 /*
212 * Write the subbuffer header for first subbuffer so we know the total
213 * duration of data gathering.
214 */
215 subbuf_header_size = config->cb.subbuffer_header_size();
216 v_set(config, &buf->offset, subbuf_header_size);
a6352fd4
MD
217 subbuffer_id_clear_noref(config, &shmp(buf->backend.buf_wsb)[0].id);
218 tsc = config->cb.ring_buffer_clock_read(shmp(buf->backend.chan));
852c2936 219 config->cb.buffer_begin(buf, tsc, 0);
a6352fd4 220 v_add(config, subbuf_header_size, &shmp(buf->commit_hot)[0].cc);
852c2936
MD
221
222 if (config->cb.buffer_create) {
223 ret = config->cb.buffer_create(buf, priv, cpu, chanb->name);
224 if (ret)
225 goto free_init;
226 }
852c2936 227 buf->backend.allocated = 1;
852c2936
MD
228 return 0;
229
230 /* Error handling */
231free_init:
a6352fd4 232 /* commit_cold will be freed by shm teardown */
852c2936 233free_commit:
a6352fd4 234 /* commit_hot will be freed by shm teardown */
852c2936
MD
235free_chanbuf:
236 lib_ring_buffer_backend_free(&buf->backend);
237 return ret;
238}
239
240static void switch_buffer_timer(unsigned long data)
241{
242 struct lib_ring_buffer *buf = (struct lib_ring_buffer *)data;
a6352fd4 243 struct channel *chan = shmp(buf->backend.chan);
852c2936
MD
244 const struct lib_ring_buffer_config *config = chan->backend.config;
245
246 /*
247 * Only flush buffers periodically if readers are active.
248 */
a6352fd4 249 if (uatomic_read(&buf->active_readers))
852c2936
MD
250 lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE);
251
a6352fd4
MD
252 //TODO timers
253 //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
254 // mod_timer_pinned(&buf->switch_timer,
255 // jiffies + chan->switch_timer_interval);
256 //else
257 // mod_timer(&buf->switch_timer,
258 // jiffies + chan->switch_timer_interval);
852c2936
MD
259}
260
852c2936
MD
261static void lib_ring_buffer_start_switch_timer(struct lib_ring_buffer *buf)
262{
a6352fd4 263 struct channel *chan = shmp(buf->backend.chan);
852c2936
MD
264 const struct lib_ring_buffer_config *config = chan->backend.config;
265
266 if (!chan->switch_timer_interval || buf->switch_timer_enabled)
267 return;
a6352fd4
MD
268 //TODO
269 //init_timer(&buf->switch_timer);
270 //buf->switch_timer.function = switch_buffer_timer;
271 //buf->switch_timer.expires = jiffies + chan->switch_timer_interval;
272 //buf->switch_timer.data = (unsigned long)buf;
273 //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
274 // add_timer_on(&buf->switch_timer, buf->backend.cpu);
275 //else
276 // add_timer(&buf->switch_timer);
852c2936
MD
277 buf->switch_timer_enabled = 1;
278}
279
852c2936
MD
280static void lib_ring_buffer_stop_switch_timer(struct lib_ring_buffer *buf)
281{
a6352fd4 282 struct channel *chan = shmp(buf->backend.chan);
852c2936
MD
283
284 if (!chan->switch_timer_interval || !buf->switch_timer_enabled)
285 return;
286
a6352fd4
MD
287 //TODO
288 //del_timer_sync(&buf->switch_timer);
852c2936
MD
289 buf->switch_timer_enabled = 0;
290}
291
292/*
293 * Polling timer to check the channels for data.
294 */
295static void read_buffer_timer(unsigned long data)
296{
297 struct lib_ring_buffer *buf = (struct lib_ring_buffer *)data;
a6352fd4 298 struct channel *chan = shmp(buf->backend.chan);
852c2936
MD
299 const struct lib_ring_buffer_config *config = chan->backend.config;
300
301 CHAN_WARN_ON(chan, !buf->backend.allocated);
302
a6352fd4 303 if (uatomic_read(&buf->active_readers)
852c2936 304 && lib_ring_buffer_poll_deliver(config, buf, chan)) {
a6352fd4
MD
305 //TODO
306 //wake_up_interruptible(&buf->read_wait);
307 //wake_up_interruptible(&chan->read_wait);
852c2936
MD
308 }
309
a6352fd4
MD
310 //TODO
311 //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
312 // mod_timer_pinned(&buf->read_timer,
313 // jiffies + chan->read_timer_interval);
314 //else
315 // mod_timer(&buf->read_timer,
316 // jiffies + chan->read_timer_interval);
852c2936
MD
317}
318
852c2936
MD
319static void lib_ring_buffer_start_read_timer(struct lib_ring_buffer *buf)
320{
a6352fd4 321 struct channel *chan = shmp(buf->backend.chan);
852c2936
MD
322 const struct lib_ring_buffer_config *config = chan->backend.config;
323
324 if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER
325 || !chan->read_timer_interval
326 || buf->read_timer_enabled)
327 return;
328
a6352fd4
MD
329 //TODO
330 //init_timer(&buf->read_timer);
331 //buf->read_timer.function = read_buffer_timer;
332 //buf->read_timer.expires = jiffies + chan->read_timer_interval;
333 //buf->read_timer.data = (unsigned long)buf;
852c2936 334
a6352fd4
MD
335 //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
336 // add_timer_on(&buf->read_timer, buf->backend.cpu);
337 //else
338 // add_timer(&buf->read_timer);
852c2936
MD
339 buf->read_timer_enabled = 1;
340}
341
852c2936
MD
342static void lib_ring_buffer_stop_read_timer(struct lib_ring_buffer *buf)
343{
a6352fd4 344 struct channel *chan = shmp(buf->backend.chan);
852c2936
MD
345 const struct lib_ring_buffer_config *config = chan->backend.config;
346
347 if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER
348 || !chan->read_timer_interval
349 || !buf->read_timer_enabled)
350 return;
351
a6352fd4
MD
352 //TODO
353 //del_timer_sync(&buf->read_timer);
852c2936
MD
354 /*
355 * do one more check to catch data that has been written in the last
356 * timer period.
357 */
358 if (lib_ring_buffer_poll_deliver(config, buf, chan)) {
a6352fd4
MD
359 //TODO
360 //wake_up_interruptible(&buf->read_wait);
361 //wake_up_interruptible(&chan->read_wait);
852c2936
MD
362 }
363 buf->read_timer_enabled = 0;
364}
365
852c2936
MD
366static void channel_unregister_notifiers(struct channel *chan)
367{
368 const struct lib_ring_buffer_config *config = chan->backend.config;
369 int cpu;
370
852c2936 371 if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
852c2936 372 for_each_possible_cpu(cpu) {
a6352fd4
MD
373 struct lib_ring_buffer *buf = &shmp(chan->backend.buf)[cpu];
374
852c2936
MD
375 lib_ring_buffer_stop_switch_timer(buf);
376 lib_ring_buffer_stop_read_timer(buf);
377 }
852c2936 378 } else {
a6352fd4 379 struct lib_ring_buffer *buf = shmp(chan->backend.buf);
852c2936
MD
380
381 lib_ring_buffer_stop_switch_timer(buf);
382 lib_ring_buffer_stop_read_timer(buf);
383 }
8d8a24c8 384 //channel_backend_unregister_notifiers(&chan->backend);
852c2936
MD
385}
386
431d5cf0 387static void channel_free(struct shm_handle *handle)
852c2936 388{
431d5cf0
MD
389 struct shm_header *header = handle->header;
390 struct channel *chan = shmp(header->chan);
391 int ret;
392
852c2936 393 channel_backend_free(&chan->backend);
431d5cf0
MD
394 /* chan is freed by shm teardown */
395 ret = munmap(header, header->shm_size);
396 if (ret) {
397 PERROR("umnmap");
398 assert(0);
399 }
400 ret = close(handle->shmfd);
401 if (ret) {
402 PERROR("close");
403 assert(0);
404 }
852c2936
MD
405}
406
407/**
408 * channel_create - Create channel.
409 * @config: ring buffer instance configuration
410 * @name: name of the channel
411 * @priv: ring buffer client private data
412 * @buf_addr: pointer the the beginning of the preallocated buffer contiguous
413 * address mapping. It is used only by RING_BUFFER_STATIC
414 * configuration. It can be set to NULL for other backends.
415 * @subbuf_size: subbuffer size
416 * @num_subbuf: number of subbuffers
417 * @switch_timer_interval: Time interval (in us) to fill sub-buffers with
418 * padding to let readers get those sub-buffers.
419 * Used for live streaming.
420 * @read_timer_interval: Time interval (in us) to wake up pending readers.
421 *
422 * Holds cpu hotplug.
423 * Returns NULL on failure.
424 */
431d5cf0 425struct shm_handle *channel_create(const struct lib_ring_buffer_config *config,
852c2936
MD
426 const char *name, void *priv, void *buf_addr,
427 size_t subbuf_size,
428 size_t num_subbuf, unsigned int switch_timer_interval,
431d5cf0 429 unsigned int read_timer_interval)
852c2936 430{
431d5cf0 431 int ret, cpu, shmfd;
852c2936 432 struct channel *chan;
431d5cf0 433 size_t shmsize, bufshmsize, bufshmalign;
a6352fd4
MD
434 struct shm_header *shm_header;
435 unsigned long num_subbuf_alloc;
431d5cf0 436 struct shm_handle *handle;
852c2936
MD
437
438 if (lib_ring_buffer_check_config(config, switch_timer_interval,
439 read_timer_interval))
440 return NULL;
441
431d5cf0
MD
442 handle = zmalloc(sizeof(struct shm_handle));
443 if (!handle)
444 return NULL;
445
a6352fd4
MD
446 /* Calculate the shm allocation layout */
447 shmsize = sizeof(struct shm_header);
431d5cf0 448 shmsize += offset_align(shmsize, __alignof__(struct channel));
a6352fd4
MD
449 shmsize += sizeof(struct channel);
450
451 /* Per-cpu buffer size: control (prior to backend) */
431d5cf0 452 shmsize += offset_align(shmsize, __alignof__(struct lib_ring_buffer));
a6352fd4
MD
453 bufshmsize = sizeof(struct lib_ring_buffer);
454 shmsize += bufshmsize * num_possible_cpus();
455
456 /* Per-cpu buffer size: backend */
431d5cf0 457 shmsize += offset_align(shmsize, PAGE_SIZE);
a6352fd4
MD
458 /* num_subbuf + 1 is the worse case */
459 num_subbuf_alloc = num_subbuf + 1;
460 bufshmsize = sizeof(struct lib_ring_buffer_backend_pages *) * num_subbuf_alloc;
431d5cf0
MD
461 bufshmsize += offset_align(bufshmsize, PAGE_SIZE);
462 bufshmsize += subbuf_size * num_subbuf_alloc;
463 bufshmsize += offset_align(bufshmsize, __alignof__(struct lib_ring_buffer_backend_pages));
464 bufshmsize += sizeof(struct lib_ring_buffer_backend_pages) * num_subbuf_alloc;
465 bufshmsize += offset_align(bufshmsize, __alignof__(struct lib_ring_buffer_backend_subbuffer));
a6352fd4 466 bufshmsize += sizeof(struct lib_ring_buffer_backend_subbuffer) * num_subbuf;
431d5cf0 467 bufshmsize += offset_align(bufshmsize, PAGE_SIZE);
a6352fd4
MD
468 shmsize += bufshmsize * num_possible_cpus();
469
470 /* Per-cpu buffer size: control (after backend) */
431d5cf0
MD
471 shmsize += offset_align(shmsize,
472 max(__alignof__(struct commit_counters_hot),
473 __alignof__(struct commit_counters_cold)));
474 bufshmsize = sizeof(struct commit_counters_hot) * num_subbuf;
475 bufshmsize += offset_align(bufshmsize, __alignof__(struct commit_counters_cold));
a6352fd4 476 bufshmsize += sizeof(struct commit_counters_cold) * num_subbuf;
431d5cf0 477 shmsize += bufshmsize * num_possible_cpus();
a6352fd4 478
431d5cf0
MD
479 /*
480 * Allocate shm, and immediately unlink its shm oject, keeping
481 * only the file descriptor as a reference to the object. If it
482 * already exists (caused by short race window during which the
483 * global object exists in a concurrent shm_open), simply retry.
484 */
485 do {
486 shmfd = shm_open("/ust-shm-tmp",
487 O_CREAT | O_EXCL | O_RDWR, 0700);
488 } while (shmfd < 0 && errno == EEXIST);
489 if (shmfd < 0) {
490 PERROR("shm_open");
491 goto error_shm_open;
a6352fd4 492 }
431d5cf0
MD
493 ret = shm_unlink("/ust-shm-tmp");
494 if (ret) {
495 PERROR("shm_unlink");
496 goto error_unlink;
497 }
498 ret = ftruncate(shmfd, shmsize);
499 if (ret) {
500 PERROR("ftruncate");
501 goto error_ftruncate;
a6352fd4 502 }
852c2936 503
431d5cf0
MD
504 shm_header = mmap(NULL, shmsize, PROT_READ | PROT_WRITE,
505 MAP_SHARED, shmfd, 0);
506 if (shm_header == MAP_FAILED) {
507 PERROR("mmap");
508 goto error_mmap;
a6352fd4
MD
509 }
510
511 shm_header->magic = SHM_MAGIC;
512 shm_header->major = SHM_MAJOR;
513 shm_header->major = SHM_MINOR;
514 shm_header->bits_per_long = CAA_BITS_PER_LONG;
515 shm_header->shm_size = shmsize;
516 shm_header->shm_allocated = sizeof(struct shm_header);
517
431d5cf0 518 align_shm(shm_header, __alignof__(struct channel));
a6352fd4
MD
519 chan = zalloc_shm(shm_header, sizeof(struct channel));
520 if (!chan)
521 goto destroy_shmem;
522 set_shmp(shm_header->chan, chan);
523
524 ret = channel_backend_init(&chan->backend, name, config, priv,
525 subbuf_size, num_subbuf, shm_header);
852c2936 526 if (ret)
a6352fd4 527 goto destroy_shmem;
852c2936
MD
528
529 chan->commit_count_mask = (~0UL >> chan->backend.num_subbuf_order);
a6352fd4
MD
530 //TODO
531 //chan->switch_timer_interval = usecs_to_jiffies(switch_timer_interval);
532 //chan->read_timer_interval = usecs_to_jiffies(read_timer_interval);
a6352fd4
MD
533 //TODO
534 //init_waitqueue_head(&chan->read_wait);
535 //init_waitqueue_head(&chan->hp_wait);
852c2936
MD
536
537 if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
852c2936
MD
538 /*
539 * In case of non-hotplug cpu, if the ring-buffer is allocated
540 * in early initcall, it will not be notified of secondary cpus.
541 * In that off case, we need to allocate for all possible cpus.
542 */
852c2936 543 for_each_possible_cpu(cpu) {
a6352fd4 544 struct lib_ring_buffer *buf = &shmp(chan->backend.buf)[cpu];
852c2936
MD
545 lib_ring_buffer_start_switch_timer(buf);
546 lib_ring_buffer_start_read_timer(buf);
852c2936 547 }
852c2936 548 } else {
a6352fd4 549 struct lib_ring_buffer *buf = shmp(chan->backend.buf);
852c2936
MD
550
551 lib_ring_buffer_start_switch_timer(buf);
552 lib_ring_buffer_start_read_timer(buf);
553 }
554
431d5cf0
MD
555 handle->header = shm_header;
556 handle->shmfd = shmfd;
557 return handle;
852c2936 558
a6352fd4 559destroy_shmem:
431d5cf0
MD
560 ret = munmap(shm_header, shmsize);
561 if (ret) {
562 PERROR("umnmap");
563 assert(0);
a6352fd4 564 }
431d5cf0
MD
565error_mmap:
566error_ftruncate:
567error_unlink:
568 ret = close(shmfd);
569 if (ret) {
570 PERROR("close");
571 assert(0);
572 }
573error_shm_open:
574 free(handle);
852c2936
MD
575 return NULL;
576}
852c2936
MD
577
578static
431d5cf0 579void channel_release(struct shm_handle *handle)
852c2936 580{
431d5cf0 581 channel_free(handle);
852c2936
MD
582}
583
584/**
585 * channel_destroy - Finalize, wait for q.s. and destroy channel.
586 * @chan: channel to destroy
587 *
588 * Holds cpu hotplug.
431d5cf0
MD
589 * Call "destroy" callback, finalize channels, decrement the channel
590 * reference count. Note that when readers have completed data
591 * consumption of finalized channels, get_subbuf() will return -ENODATA.
592 * They should release their handle at that point. Returns the private
593 * data pointer.
852c2936 594 */
431d5cf0 595void *channel_destroy(struct shm_handle *handle)
852c2936 596{
431d5cf0
MD
597 struct shm_header *header = handle->header;
598 struct channel *chan = shmp(header->chan);
852c2936
MD
599 const struct lib_ring_buffer_config *config = chan->backend.config;
600 void *priv;
431d5cf0 601 int cpu;
852c2936
MD
602
603 channel_unregister_notifiers(chan);
604
605 if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
852c2936 606 for_each_channel_cpu(cpu, chan) {
a6352fd4 607 struct lib_ring_buffer *buf = &shmp(chan->backend.buf)[cpu];
852c2936
MD
608
609 if (config->cb.buffer_finalize)
610 config->cb.buffer_finalize(buf,
611 chan->backend.priv,
612 cpu);
613 if (buf->backend.allocated)
614 lib_ring_buffer_switch_slow(buf, SWITCH_FLUSH);
615 /*
616 * Perform flush before writing to finalized.
617 */
a6352fd4 618 cmm_smp_wmb();
14641deb 619 CMM_ACCESS_ONCE(buf->finalized) = 1;
a6352fd4 620 //wake_up_interruptible(&buf->read_wait);
852c2936
MD
621 }
622 } else {
a6352fd4 623 struct lib_ring_buffer *buf = shmp(chan->backend.buf);
852c2936
MD
624
625 if (config->cb.buffer_finalize)
626 config->cb.buffer_finalize(buf, chan->backend.priv, -1);
627 if (buf->backend.allocated)
628 lib_ring_buffer_switch_slow(buf, SWITCH_FLUSH);
629 /*
630 * Perform flush before writing to finalized.
631 */
a6352fd4 632 cmm_smp_wmb();
14641deb 633 CMM_ACCESS_ONCE(buf->finalized) = 1;
a6352fd4 634 //wake_up_interruptible(&buf->read_wait);
852c2936 635 }
14641deb 636 CMM_ACCESS_ONCE(chan->finalized) = 1;
a6352fd4
MD
637 //wake_up_interruptible(&chan->hp_wait);
638 //wake_up_interruptible(&chan->read_wait);
431d5cf0
MD
639 /*
640 * sessiond/consumer are keeping a reference on the shm file
641 * descriptor directly. No need to refcount.
642 */
643 channel_release(handle);
852c2936
MD
644 priv = chan->backend.priv;
645 return priv;
646}
852c2936
MD
647
648struct lib_ring_buffer *channel_get_ring_buffer(
649 const struct lib_ring_buffer_config *config,
650 struct channel *chan, int cpu)
651{
652 if (config->alloc == RING_BUFFER_ALLOC_GLOBAL)
a6352fd4 653 return shmp(chan->backend.buf);
852c2936 654 else
a6352fd4 655 return &shmp(chan->backend.buf)[cpu];
852c2936 656}
852c2936
MD
657
658int lib_ring_buffer_open_read(struct lib_ring_buffer *buf)
659{
a6352fd4 660 struct channel *chan = shmp(buf->backend.chan);
852c2936 661
a6352fd4 662 if (uatomic_cmpxchg(&buf->active_readers, 0, 1) != 0)
852c2936 663 return -EBUSY;
a6352fd4 664 cmm_smp_mb();
852c2936
MD
665 return 0;
666}
852c2936
MD
667
668void lib_ring_buffer_release_read(struct lib_ring_buffer *buf)
669{
a6352fd4 670 struct channel *chan = shmp(buf->backend.chan);
852c2936 671
a6352fd4
MD
672 CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1);
673 cmm_smp_mb();
674 uatomic_dec(&buf->active_readers);
852c2936
MD
675}
676
677/**
678 * lib_ring_buffer_snapshot - save subbuffer position snapshot (for read)
679 * @buf: ring buffer
680 * @consumed: consumed count indicating the position where to read
681 * @produced: produced count, indicates position when to stop reading
682 *
683 * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no
684 * data to read at consumed position, or 0 if the get operation succeeds.
852c2936
MD
685 */
686
687int lib_ring_buffer_snapshot(struct lib_ring_buffer *buf,
688 unsigned long *consumed, unsigned long *produced)
689{
a6352fd4 690 struct channel *chan = shmp(buf->backend.chan);
852c2936
MD
691 const struct lib_ring_buffer_config *config = chan->backend.config;
692 unsigned long consumed_cur, write_offset;
693 int finalized;
694
14641deb 695 finalized = CMM_ACCESS_ONCE(buf->finalized);
852c2936
MD
696 /*
697 * Read finalized before counters.
698 */
a6352fd4
MD
699 cmm_smp_rmb();
700 consumed_cur = uatomic_read(&buf->consumed);
852c2936
MD
701 /*
702 * No need to issue a memory barrier between consumed count read and
703 * write offset read, because consumed count can only change
704 * concurrently in overwrite mode, and we keep a sequence counter
705 * identifier derived from the write offset to check we are getting
706 * the same sub-buffer we are expecting (the sub-buffers are atomically
707 * "tagged" upon writes, tags are checked upon read).
708 */
709 write_offset = v_read(config, &buf->offset);
710
711 /*
712 * Check that we are not about to read the same subbuffer in
713 * which the writer head is.
714 */
715 if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_cur, chan)
716 == 0)
717 goto nodata;
718
719 *consumed = consumed_cur;
720 *produced = subbuf_trunc(write_offset, chan);
721
722 return 0;
723
724nodata:
725 /*
726 * The memory barriers __wait_event()/wake_up_interruptible() take care
727 * of "raw_spin_is_locked" memory ordering.
728 */
729 if (finalized)
730 return -ENODATA;
852c2936
MD
731 else
732 return -EAGAIN;
733}
852c2936
MD
734
735/**
736 * lib_ring_buffer_put_snapshot - move consumed counter forward
737 * @buf: ring buffer
738 * @consumed_new: new consumed count value
739 */
740void lib_ring_buffer_move_consumer(struct lib_ring_buffer *buf,
741 unsigned long consumed_new)
742{
743 struct lib_ring_buffer_backend *bufb = &buf->backend;
a6352fd4 744 struct channel *chan = shmp(bufb->chan);
852c2936
MD
745 unsigned long consumed;
746
a6352fd4 747 CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1);
852c2936
MD
748
749 /*
750 * Only push the consumed value forward.
751 * If the consumed cmpxchg fails, this is because we have been pushed by
752 * the writer in flight recorder mode.
753 */
a6352fd4 754 consumed = uatomic_read(&buf->consumed);
852c2936 755 while ((long) consumed - (long) consumed_new < 0)
a6352fd4
MD
756 consumed = uatomic_cmpxchg(&buf->consumed, consumed,
757 consumed_new);
852c2936 758}
852c2936
MD
759
760/**
761 * lib_ring_buffer_get_subbuf - get exclusive access to subbuffer for reading
762 * @buf: ring buffer
763 * @consumed: consumed count indicating the position where to read
764 *
765 * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no
766 * data to read at consumed position, or 0 if the get operation succeeds.
852c2936
MD
767 */
768int lib_ring_buffer_get_subbuf(struct lib_ring_buffer *buf,
769 unsigned long consumed)
770{
a6352fd4 771 struct channel *chan = shmp(buf->backend.chan);
852c2936
MD
772 const struct lib_ring_buffer_config *config = chan->backend.config;
773 unsigned long consumed_cur, consumed_idx, commit_count, write_offset;
774 int ret;
775 int finalized;
776
777retry:
14641deb 778 finalized = CMM_ACCESS_ONCE(buf->finalized);
852c2936
MD
779 /*
780 * Read finalized before counters.
781 */
a6352fd4
MD
782 cmm_smp_rmb();
783 consumed_cur = uatomic_read(&buf->consumed);
852c2936 784 consumed_idx = subbuf_index(consumed, chan);
a6352fd4 785 commit_count = v_read(config, &shmp(buf->commit_cold)[consumed_idx].cc_sb);
852c2936
MD
786 /*
787 * Make sure we read the commit count before reading the buffer
788 * data and the write offset. Correct consumed offset ordering
789 * wrt commit count is insured by the use of cmpxchg to update
790 * the consumed offset.
852c2936 791 */
a6352fd4
MD
792 /*
793 * Local rmb to match the remote wmb to read the commit count
794 * before the buffer data and the write offset.
795 */
796 cmm_smp_rmb();
852c2936
MD
797
798 write_offset = v_read(config, &buf->offset);
799
800 /*
801 * Check that the buffer we are getting is after or at consumed_cur
802 * position.
803 */
804 if ((long) subbuf_trunc(consumed, chan)
805 - (long) subbuf_trunc(consumed_cur, chan) < 0)
806 goto nodata;
807
808 /*
809 * Check that the subbuffer we are trying to consume has been
810 * already fully committed.
811 */
812 if (((commit_count - chan->backend.subbuf_size)
813 & chan->commit_count_mask)
814 - (buf_trunc(consumed_cur, chan)
815 >> chan->backend.num_subbuf_order)
816 != 0)
817 goto nodata;
818
819 /*
820 * Check that we are not about to read the same subbuffer in
821 * which the writer head is.
822 */
823 if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_cur, chan)
824 == 0)
825 goto nodata;
826
827 /*
828 * Failure to get the subbuffer causes a busy-loop retry without going
829 * to a wait queue. These are caused by short-lived race windows where
830 * the writer is getting access to a subbuffer we were trying to get
831 * access to. Also checks that the "consumed" buffer count we are
832 * looking for matches the one contained in the subbuffer id.
833 */
834 ret = update_read_sb_index(config, &buf->backend, &chan->backend,
835 consumed_idx, buf_trunc_val(consumed, chan));
836 if (ret)
837 goto retry;
838 subbuffer_id_clear_noref(config, &buf->backend.buf_rsb.id);
839
840 buf->get_subbuf_consumed = consumed;
841 buf->get_subbuf = 1;
842
843 return 0;
844
845nodata:
846 /*
847 * The memory barriers __wait_event()/wake_up_interruptible() take care
848 * of "raw_spin_is_locked" memory ordering.
849 */
850 if (finalized)
851 return -ENODATA;
852c2936
MD
852 else
853 return -EAGAIN;
854}
852c2936
MD
855
856/**
857 * lib_ring_buffer_put_subbuf - release exclusive subbuffer access
858 * @buf: ring buffer
859 */
860void lib_ring_buffer_put_subbuf(struct lib_ring_buffer *buf)
861{
862 struct lib_ring_buffer_backend *bufb = &buf->backend;
a6352fd4 863 struct channel *chan = shmp(bufb->chan);
852c2936
MD
864 const struct lib_ring_buffer_config *config = chan->backend.config;
865 unsigned long read_sb_bindex, consumed_idx, consumed;
866
a6352fd4 867 CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1);
852c2936
MD
868
869 if (!buf->get_subbuf) {
870 /*
871 * Reader puts a subbuffer it did not get.
872 */
873 CHAN_WARN_ON(chan, 1);
874 return;
875 }
876 consumed = buf->get_subbuf_consumed;
877 buf->get_subbuf = 0;
878
879 /*
880 * Clear the records_unread counter. (overruns counter)
881 * Can still be non-zero if a file reader simply grabbed the data
882 * without using iterators.
883 * Can be below zero if an iterator is used on a snapshot more than
884 * once.
885 */
886 read_sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id);
887 v_add(config, v_read(config,
a6352fd4 888 &shmp(bufb->array)[read_sb_bindex]->records_unread),
852c2936 889 &bufb->records_read);
a6352fd4 890 v_set(config, &shmp(bufb->array)[read_sb_bindex]->records_unread, 0);
852c2936
MD
891 CHAN_WARN_ON(chan, config->mode == RING_BUFFER_OVERWRITE
892 && subbuffer_id_is_noref(config, bufb->buf_rsb.id));
893 subbuffer_id_set_noref(config, &bufb->buf_rsb.id);
894
895 /*
896 * Exchange the reader subbuffer with the one we put in its place in the
897 * writer subbuffer table. Expect the original consumed count. If
898 * update_read_sb_index fails, this is because the writer updated the
899 * subbuffer concurrently. We should therefore keep the subbuffer we
900 * currently have: it has become invalid to try reading this sub-buffer
901 * consumed count value anyway.
902 */
903 consumed_idx = subbuf_index(consumed, chan);
904 update_read_sb_index(config, &buf->backend, &chan->backend,
905 consumed_idx, buf_trunc_val(consumed, chan));
906 /*
907 * update_read_sb_index return value ignored. Don't exchange sub-buffer
908 * if the writer concurrently updated it.
909 */
910}
852c2936
MD
911
912/*
913 * cons_offset is an iterator on all subbuffer offsets between the reader
914 * position and the writer position. (inclusive)
915 */
916static
917void lib_ring_buffer_print_subbuffer_errors(struct lib_ring_buffer *buf,
918 struct channel *chan,
919 unsigned long cons_offset,
920 int cpu)
921{
922 const struct lib_ring_buffer_config *config = chan->backend.config;
923 unsigned long cons_idx, commit_count, commit_count_sb;
924
925 cons_idx = subbuf_index(cons_offset, chan);
a6352fd4
MD
926 commit_count = v_read(config, &shmp(buf->commit_hot)[cons_idx].cc);
927 commit_count_sb = v_read(config, &shmp(buf->commit_cold)[cons_idx].cc_sb);
852c2936
MD
928
929 if (subbuf_offset(commit_count, chan) != 0)
a6352fd4 930 ERRMSG("ring buffer %s, cpu %d: "
852c2936
MD
931 "commit count in subbuffer %lu,\n"
932 "expecting multiples of %lu bytes\n"
933 " [ %lu bytes committed, %lu bytes reader-visible ]\n",
934 chan->backend.name, cpu, cons_idx,
935 chan->backend.subbuf_size,
936 commit_count, commit_count_sb);
937
a6352fd4 938 ERRMSG("ring buffer: %s, cpu %d: %lu bytes committed\n",
852c2936
MD
939 chan->backend.name, cpu, commit_count);
940}
941
942static
943void lib_ring_buffer_print_buffer_errors(struct lib_ring_buffer *buf,
944 struct channel *chan,
945 void *priv, int cpu)
946{
947 const struct lib_ring_buffer_config *config = chan->backend.config;
948 unsigned long write_offset, cons_offset;
949
950 /*
951 * Can be called in the error path of allocation when
952 * trans_channel_data is not yet set.
953 */
954 if (!chan)
955 return;
956 /*
957 * No need to order commit_count, write_offset and cons_offset reads
958 * because we execute at teardown when no more writer nor reader
959 * references are left.
960 */
961 write_offset = v_read(config, &buf->offset);
a6352fd4 962 cons_offset = uatomic_read(&buf->consumed);
852c2936 963 if (write_offset != cons_offset)
a6352fd4 964 ERRMSG("ring buffer %s, cpu %d: "
852c2936
MD
965 "non-consumed data\n"
966 " [ %lu bytes written, %lu bytes read ]\n",
967 chan->backend.name, cpu, write_offset, cons_offset);
968
a6352fd4 969 for (cons_offset = uatomic_read(&buf->consumed);
852c2936
MD
970 (long) (subbuf_trunc((unsigned long) v_read(config, &buf->offset),
971 chan)
972 - cons_offset) > 0;
973 cons_offset = subbuf_align(cons_offset, chan))
974 lib_ring_buffer_print_subbuffer_errors(buf, chan, cons_offset,
975 cpu);
976}
977
978static
979void lib_ring_buffer_print_errors(struct channel *chan,
980 struct lib_ring_buffer *buf, int cpu)
981{
982 const struct lib_ring_buffer_config *config = chan->backend.config;
983 void *priv = chan->backend.priv;
984
a6352fd4 985 ERRMSG("ring buffer %s, cpu %d: %lu records written, "
852c2936
MD
986 "%lu records overrun\n",
987 chan->backend.name, cpu,
988 v_read(config, &buf->records_count),
989 v_read(config, &buf->records_overrun));
990
991 if (v_read(config, &buf->records_lost_full)
992 || v_read(config, &buf->records_lost_wrap)
993 || v_read(config, &buf->records_lost_big))
a6352fd4 994 ERRMSG("ring buffer %s, cpu %d: records were lost. Caused by:\n"
852c2936
MD
995 " [ %lu buffer full, %lu nest buffer wrap-around, "
996 "%lu event too big ]\n",
997 chan->backend.name, cpu,
998 v_read(config, &buf->records_lost_full),
999 v_read(config, &buf->records_lost_wrap),
1000 v_read(config, &buf->records_lost_big));
1001
1002 lib_ring_buffer_print_buffer_errors(buf, chan, priv, cpu);
1003}
1004
1005/*
1006 * lib_ring_buffer_switch_old_start: Populate old subbuffer header.
1007 *
1008 * Only executed when the buffer is finalized, in SWITCH_FLUSH.
1009 */
1010static
1011void lib_ring_buffer_switch_old_start(struct lib_ring_buffer *buf,
1012 struct channel *chan,
1013 struct switch_offsets *offsets,
1014 u64 tsc)
1015{
1016 const struct lib_ring_buffer_config *config = chan->backend.config;
1017 unsigned long oldidx = subbuf_index(offsets->old, chan);
1018 unsigned long commit_count;
1019
1020 config->cb.buffer_begin(buf, tsc, oldidx);
1021
1022 /*
1023 * Order all writes to buffer before the commit count update that will
1024 * determine that the subbuffer is full.
1025 */
a6352fd4 1026 cmm_smp_wmb();
852c2936 1027 v_add(config, config->cb.subbuffer_header_size(),
a6352fd4
MD
1028 &shmp(buf->commit_hot)[oldidx].cc);
1029 commit_count = v_read(config, &shmp(buf->commit_hot)[oldidx].cc);
852c2936
MD
1030 /* Check if the written buffer has to be delivered */
1031 lib_ring_buffer_check_deliver(config, buf, chan, offsets->old,
1032 commit_count, oldidx);
1033 lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx,
1034 offsets->old, commit_count,
1035 config->cb.subbuffer_header_size());
1036}
1037
1038/*
1039 * lib_ring_buffer_switch_old_end: switch old subbuffer
1040 *
1041 * Note : offset_old should never be 0 here. It is ok, because we never perform
1042 * buffer switch on an empty subbuffer in SWITCH_ACTIVE mode. The caller
1043 * increments the offset_old value when doing a SWITCH_FLUSH on an empty
1044 * subbuffer.
1045 */
1046static
1047void lib_ring_buffer_switch_old_end(struct lib_ring_buffer *buf,
1048 struct channel *chan,
1049 struct switch_offsets *offsets,
1050 u64 tsc)
1051{
1052 const struct lib_ring_buffer_config *config = chan->backend.config;
1053 unsigned long oldidx = subbuf_index(offsets->old - 1, chan);
1054 unsigned long commit_count, padding_size, data_size;
1055
1056 data_size = subbuf_offset(offsets->old - 1, chan) + 1;
1057 padding_size = chan->backend.subbuf_size - data_size;
1058 subbuffer_set_data_size(config, &buf->backend, oldidx, data_size);
1059
1060 /*
1061 * Order all writes to buffer before the commit count update that will
1062 * determine that the subbuffer is full.
1063 */
a6352fd4
MD
1064 cmm_smp_wmb();
1065 v_add(config, padding_size, &shmp(buf->commit_hot)[oldidx].cc);
1066 commit_count = v_read(config, &shmp(buf->commit_hot)[oldidx].cc);
852c2936
MD
1067 lib_ring_buffer_check_deliver(config, buf, chan, offsets->old - 1,
1068 commit_count, oldidx);
1069 lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx,
1070 offsets->old, commit_count,
1071 padding_size);
1072}
1073
1074/*
1075 * lib_ring_buffer_switch_new_start: Populate new subbuffer.
1076 *
1077 * This code can be executed unordered : writers may already have written to the
1078 * sub-buffer before this code gets executed, caution. The commit makes sure
1079 * that this code is executed before the deliver of this sub-buffer.
1080 */
1081static
1082void lib_ring_buffer_switch_new_start(struct lib_ring_buffer *buf,
1083 struct channel *chan,
1084 struct switch_offsets *offsets,
1085 u64 tsc)
1086{
1087 const struct lib_ring_buffer_config *config = chan->backend.config;
1088 unsigned long beginidx = subbuf_index(offsets->begin, chan);
1089 unsigned long commit_count;
1090
1091 config->cb.buffer_begin(buf, tsc, beginidx);
1092
1093 /*
1094 * Order all writes to buffer before the commit count update that will
1095 * determine that the subbuffer is full.
1096 */
a6352fd4 1097 cmm_smp_wmb();
852c2936 1098 v_add(config, config->cb.subbuffer_header_size(),
a6352fd4
MD
1099 &shmp(buf->commit_hot)[beginidx].cc);
1100 commit_count = v_read(config, &shmp(buf->commit_hot)[beginidx].cc);
852c2936
MD
1101 /* Check if the written buffer has to be delivered */
1102 lib_ring_buffer_check_deliver(config, buf, chan, offsets->begin,
1103 commit_count, beginidx);
1104 lib_ring_buffer_write_commit_counter(config, buf, chan, beginidx,
1105 offsets->begin, commit_count,
1106 config->cb.subbuffer_header_size());
1107}
1108
1109/*
1110 * lib_ring_buffer_switch_new_end: finish switching current subbuffer
1111 *
1112 * The only remaining threads could be the ones with pending commits. They will
1113 * have to do the deliver themselves.
1114 */
1115static
1116void lib_ring_buffer_switch_new_end(struct lib_ring_buffer *buf,
1117 struct channel *chan,
1118 struct switch_offsets *offsets,
1119 u64 tsc)
1120{
1121 const struct lib_ring_buffer_config *config = chan->backend.config;
1122 unsigned long endidx = subbuf_index(offsets->end - 1, chan);
1123 unsigned long commit_count, padding_size, data_size;
1124
1125 data_size = subbuf_offset(offsets->end - 1, chan) + 1;
1126 padding_size = chan->backend.subbuf_size - data_size;
1127 subbuffer_set_data_size(config, &buf->backend, endidx, data_size);
1128
1129 /*
1130 * Order all writes to buffer before the commit count update that will
1131 * determine that the subbuffer is full.
1132 */
a6352fd4
MD
1133 cmm_smp_wmb();
1134 v_add(config, padding_size, &shmp(buf->commit_hot)[endidx].cc);
1135 commit_count = v_read(config, &shmp(buf->commit_hot)[endidx].cc);
852c2936
MD
1136 lib_ring_buffer_check_deliver(config, buf, chan, offsets->end - 1,
1137 commit_count, endidx);
1138 lib_ring_buffer_write_commit_counter(config, buf, chan, endidx,
1139 offsets->end, commit_count,
1140 padding_size);
1141}
1142
1143/*
1144 * Returns :
1145 * 0 if ok
1146 * !0 if execution must be aborted.
1147 */
1148static
1149int lib_ring_buffer_try_switch_slow(enum switch_mode mode,
1150 struct lib_ring_buffer *buf,
1151 struct channel *chan,
1152 struct switch_offsets *offsets,
1153 u64 *tsc)
1154{
1155 const struct lib_ring_buffer_config *config = chan->backend.config;
1156 unsigned long off;
1157
1158 offsets->begin = v_read(config, &buf->offset);
1159 offsets->old = offsets->begin;
1160 offsets->switch_old_start = 0;
1161 off = subbuf_offset(offsets->begin, chan);
1162
1163 *tsc = config->cb.ring_buffer_clock_read(chan);
1164
1165 /*
1166 * Ensure we flush the header of an empty subbuffer when doing the
1167 * finalize (SWITCH_FLUSH). This ensures that we end up knowing the
1168 * total data gathering duration even if there were no records saved
1169 * after the last buffer switch.
1170 * In SWITCH_ACTIVE mode, switch the buffer when it contains events.
1171 * SWITCH_ACTIVE only flushes the current subbuffer, dealing with end of
1172 * subbuffer header as appropriate.
1173 * The next record that reserves space will be responsible for
1174 * populating the following subbuffer header. We choose not to populate
1175 * the next subbuffer header here because we want to be able to use
a6352fd4
MD
1176 * SWITCH_ACTIVE for periodical buffer flush, which must
1177 * guarantee that all the buffer content (records and header
1178 * timestamps) are visible to the reader. This is required for
1179 * quiescence guarantees for the fusion merge.
852c2936
MD
1180 */
1181 if (mode == SWITCH_FLUSH || off > 0) {
1182 if (unlikely(off == 0)) {
1183 /*
1184 * The client does not save any header information.
1185 * Don't switch empty subbuffer on finalize, because it
1186 * is invalid to deliver a completely empty subbuffer.
1187 */
1188 if (!config->cb.subbuffer_header_size())
1189 return -1;
1190 /*
1191 * Need to write the subbuffer start header on finalize.
1192 */
1193 offsets->switch_old_start = 1;
1194 }
1195 offsets->begin = subbuf_align(offsets->begin, chan);
1196 } else
1197 return -1; /* we do not have to switch : buffer is empty */
1198 /* Note: old points to the next subbuf at offset 0 */
1199 offsets->end = offsets->begin;
1200 return 0;
1201}
1202
1203/*
1204 * Force a sub-buffer switch. This operation is completely reentrant : can be
1205 * called while tracing is active with absolutely no lock held.
1206 *
1207 * Note, however, that as a v_cmpxchg is used for some atomic
1208 * operations, this function must be called from the CPU which owns the buffer
1209 * for a ACTIVE flush.
1210 */
1211void lib_ring_buffer_switch_slow(struct lib_ring_buffer *buf, enum switch_mode mode)
1212{
a6352fd4 1213 struct channel *chan = shmp(buf->backend.chan);
852c2936
MD
1214 const struct lib_ring_buffer_config *config = chan->backend.config;
1215 struct switch_offsets offsets;
1216 unsigned long oldidx;
1217 u64 tsc;
1218
1219 offsets.size = 0;
1220
1221 /*
1222 * Perform retryable operations.
1223 */
1224 do {
1225 if (lib_ring_buffer_try_switch_slow(mode, buf, chan, &offsets,
1226 &tsc))
1227 return; /* Switch not needed */
1228 } while (v_cmpxchg(config, &buf->offset, offsets.old, offsets.end)
1229 != offsets.old);
1230
1231 /*
1232 * Atomically update last_tsc. This update races against concurrent
1233 * atomic updates, but the race will always cause supplementary full TSC
1234 * records, never the opposite (missing a full TSC record when it would
1235 * be needed).
1236 */
1237 save_last_tsc(config, buf, tsc);
1238
1239 /*
1240 * Push the reader if necessary
1241 */
1242 lib_ring_buffer_reserve_push_reader(buf, chan, offsets.old);
1243
1244 oldidx = subbuf_index(offsets.old, chan);
1245 lib_ring_buffer_clear_noref(config, &buf->backend, oldidx);
1246
1247 /*
1248 * May need to populate header start on SWITCH_FLUSH.
1249 */
1250 if (offsets.switch_old_start) {
1251 lib_ring_buffer_switch_old_start(buf, chan, &offsets, tsc);
1252 offsets.old += config->cb.subbuffer_header_size();
1253 }
1254
1255 /*
1256 * Switch old subbuffer.
1257 */
1258 lib_ring_buffer_switch_old_end(buf, chan, &offsets, tsc);
1259}
852c2936
MD
1260
1261/*
1262 * Returns :
1263 * 0 if ok
1264 * -ENOSPC if event size is too large for packet.
1265 * -ENOBUFS if there is currently not enough space in buffer for the event.
1266 * -EIO if data cannot be written into the buffer for any other reason.
1267 */
1268static
1269int lib_ring_buffer_try_reserve_slow(struct lib_ring_buffer *buf,
1270 struct channel *chan,
1271 struct switch_offsets *offsets,
1272 struct lib_ring_buffer_ctx *ctx)
1273{
1274 const struct lib_ring_buffer_config *config = chan->backend.config;
1275 unsigned long reserve_commit_diff;
1276
1277 offsets->begin = v_read(config, &buf->offset);
1278 offsets->old = offsets->begin;
1279 offsets->switch_new_start = 0;
1280 offsets->switch_new_end = 0;
1281 offsets->switch_old_end = 0;
1282 offsets->pre_header_padding = 0;
1283
1284 ctx->tsc = config->cb.ring_buffer_clock_read(chan);
1285 if ((int64_t) ctx->tsc == -EIO)
1286 return -EIO;
1287
1288 if (last_tsc_overflow(config, buf, ctx->tsc))
1289 ctx->rflags |= RING_BUFFER_RFLAG_FULL_TSC;
1290
1291 if (unlikely(subbuf_offset(offsets->begin, ctx->chan) == 0)) {
1292 offsets->switch_new_start = 1; /* For offsets->begin */
1293 } else {
1294 offsets->size = config->cb.record_header_size(config, chan,
1295 offsets->begin,
1296 &offsets->pre_header_padding,
1297 ctx);
1298 offsets->size +=
1299 lib_ring_buffer_align(offsets->begin + offsets->size,
1300 ctx->largest_align)
1301 + ctx->data_size;
1302 if (unlikely(subbuf_offset(offsets->begin, chan) +
1303 offsets->size > chan->backend.subbuf_size)) {
1304 offsets->switch_old_end = 1; /* For offsets->old */
1305 offsets->switch_new_start = 1; /* For offsets->begin */
1306 }
1307 }
1308 if (unlikely(offsets->switch_new_start)) {
1309 unsigned long sb_index;
1310
1311 /*
1312 * We are typically not filling the previous buffer completely.
1313 */
1314 if (likely(offsets->switch_old_end))
1315 offsets->begin = subbuf_align(offsets->begin, chan);
1316 offsets->begin = offsets->begin
1317 + config->cb.subbuffer_header_size();
1318 /* Test new buffer integrity */
1319 sb_index = subbuf_index(offsets->begin, chan);
1320 reserve_commit_diff =
1321 (buf_trunc(offsets->begin, chan)
1322 >> chan->backend.num_subbuf_order)
1323 - ((unsigned long) v_read(config,
a6352fd4 1324 &shmp(buf->commit_cold)[sb_index].cc_sb)
852c2936
MD
1325 & chan->commit_count_mask);
1326 if (likely(reserve_commit_diff == 0)) {
1327 /* Next subbuffer not being written to. */
1328 if (unlikely(config->mode != RING_BUFFER_OVERWRITE &&
1329 subbuf_trunc(offsets->begin, chan)
1330 - subbuf_trunc((unsigned long)
a6352fd4 1331 uatomic_read(&buf->consumed), chan)
852c2936
MD
1332 >= chan->backend.buf_size)) {
1333 /*
1334 * We do not overwrite non consumed buffers
1335 * and we are full : record is lost.
1336 */
1337 v_inc(config, &buf->records_lost_full);
1338 return -ENOBUFS;
1339 } else {
1340 /*
1341 * Next subbuffer not being written to, and we
1342 * are either in overwrite mode or the buffer is
1343 * not full. It's safe to write in this new
1344 * subbuffer.
1345 */
1346 }
1347 } else {
1348 /*
1349 * Next subbuffer reserve offset does not match the
1350 * commit offset. Drop record in producer-consumer and
1351 * overwrite mode. Caused by either a writer OOPS or too
1352 * many nested writes over a reserve/commit pair.
1353 */
1354 v_inc(config, &buf->records_lost_wrap);
1355 return -EIO;
1356 }
1357 offsets->size =
1358 config->cb.record_header_size(config, chan,
1359 offsets->begin,
1360 &offsets->pre_header_padding,
1361 ctx);
1362 offsets->size +=
1363 lib_ring_buffer_align(offsets->begin + offsets->size,
1364 ctx->largest_align)
1365 + ctx->data_size;
1366 if (unlikely(subbuf_offset(offsets->begin, chan)
1367 + offsets->size > chan->backend.subbuf_size)) {
1368 /*
1369 * Record too big for subbuffers, report error, don't
1370 * complete the sub-buffer switch.
1371 */
1372 v_inc(config, &buf->records_lost_big);
1373 return -ENOSPC;
1374 } else {
1375 /*
1376 * We just made a successful buffer switch and the
1377 * record fits in the new subbuffer. Let's write.
1378 */
1379 }
1380 } else {
1381 /*
1382 * Record fits in the current buffer and we are not on a switch
1383 * boundary. It's safe to write.
1384 */
1385 }
1386 offsets->end = offsets->begin + offsets->size;
1387
1388 if (unlikely(subbuf_offset(offsets->end, chan) == 0)) {
1389 /*
1390 * The offset_end will fall at the very beginning of the next
1391 * subbuffer.
1392 */
1393 offsets->switch_new_end = 1; /* For offsets->begin */
1394 }
1395 return 0;
1396}
1397
1398/**
1399 * lib_ring_buffer_reserve_slow - Atomic slot reservation in a buffer.
1400 * @ctx: ring buffer context.
1401 *
1402 * Return : -NOBUFS if not enough space, -ENOSPC if event size too large,
1403 * -EIO for other errors, else returns 0.
1404 * It will take care of sub-buffer switching.
1405 */
1406int lib_ring_buffer_reserve_slow(struct lib_ring_buffer_ctx *ctx)
1407{
1408 struct channel *chan = ctx->chan;
1409 const struct lib_ring_buffer_config *config = chan->backend.config;
1410 struct lib_ring_buffer *buf;
1411 struct switch_offsets offsets;
1412 int ret;
1413
1414 if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
a6352fd4 1415 buf = &shmp(chan->backend.buf)[ctx->cpu];
852c2936 1416 else
a6352fd4 1417 buf = shmp(chan->backend.buf);
852c2936
MD
1418 ctx->buf = buf;
1419
1420 offsets.size = 0;
1421
1422 do {
1423 ret = lib_ring_buffer_try_reserve_slow(buf, chan, &offsets,
1424 ctx);
1425 if (unlikely(ret))
1426 return ret;
1427 } while (unlikely(v_cmpxchg(config, &buf->offset, offsets.old,
1428 offsets.end)
1429 != offsets.old));
1430
1431 /*
1432 * Atomically update last_tsc. This update races against concurrent
1433 * atomic updates, but the race will always cause supplementary full TSC
1434 * records, never the opposite (missing a full TSC record when it would
1435 * be needed).
1436 */
1437 save_last_tsc(config, buf, ctx->tsc);
1438
1439 /*
1440 * Push the reader if necessary
1441 */
1442 lib_ring_buffer_reserve_push_reader(buf, chan, offsets.end - 1);
1443
1444 /*
1445 * Clear noref flag for this subbuffer.
1446 */
1447 lib_ring_buffer_clear_noref(config, &buf->backend,
1448 subbuf_index(offsets.end - 1, chan));
1449
1450 /*
1451 * Switch old subbuffer if needed.
1452 */
1453 if (unlikely(offsets.switch_old_end)) {
1454 lib_ring_buffer_clear_noref(config, &buf->backend,
1455 subbuf_index(offsets.old - 1, chan));
1456 lib_ring_buffer_switch_old_end(buf, chan, &offsets, ctx->tsc);
1457 }
1458
1459 /*
1460 * Populate new subbuffer.
1461 */
1462 if (unlikely(offsets.switch_new_start))
1463 lib_ring_buffer_switch_new_start(buf, chan, &offsets, ctx->tsc);
1464
1465 if (unlikely(offsets.switch_new_end))
1466 lib_ring_buffer_switch_new_end(buf, chan, &offsets, ctx->tsc);
1467
1468 ctx->slot_size = offsets.size;
1469 ctx->pre_offset = offsets.begin;
1470 ctx->buf_offset = offsets.begin + offsets.pre_header_padding;
1471 return 0;
1472}
This page took 0.085545 seconds and 4 git commands to generate.