Merge branch 'master' into dev
[lttng-ust.git] / libringbuffer / ring_buffer_frontend.c
CommitLineData
852c2936
MD
1/*
2 * ring_buffer_frontend.c
3 *
4 * (C) Copyright 2005-2010 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
5 *
6 * Ring buffer wait-free buffer synchronization. Producer-consumer and flight
7 * recorder (overwrite) modes. See thesis:
8 *
9 * Desnoyers, Mathieu (2009), "Low-Impact Operating System Tracing", Ph.D.
10 * dissertation, Ecole Polytechnique de Montreal.
11 * http://www.lttng.org/pub/thesis/desnoyers-dissertation-2009-12.pdf
12 *
13 * - Algorithm presentation in Chapter 5:
14 * "Lockless Multi-Core High-Throughput Buffering".
15 * - Algorithm formal verification in Section 8.6:
16 * "Formal verification of LTTng"
17 *
18 * Author:
19 * Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
20 *
21 * Inspired from LTT and RelayFS:
22 * Karim Yaghmour <karim@opersys.com>
23 * Tom Zanussi <zanussi@us.ibm.com>
24 * Bob Wisniewski <bob@watson.ibm.com>
25 * And from K42 :
26 * Bob Wisniewski <bob@watson.ibm.com>
27 *
28 * Buffer reader semantic :
29 *
30 * - get_subbuf_size
31 * while buffer is not finalized and empty
32 * - get_subbuf
33 * - if return value != 0, continue
34 * - splice one subbuffer worth of data to a pipe
35 * - splice the data from pipe to disk/network
36 * - put_subbuf
37 *
38 * Dual LGPL v2.1/GPL v2 license.
39 */
40
5ad63a16 41#define _GNU_SOURCE
a6352fd4 42#include <sys/types.h>
431d5cf0
MD
43#include <sys/mman.h>
44#include <sys/stat.h>
45#include <fcntl.h>
14641deb 46#include <urcu/compiler.h>
a6352fd4 47#include <urcu/ref.h>
35897f8b 48#include <helper.h>
14641deb 49
a6352fd4 50#include "smp.h"
4318ae1b 51#include <lttng/ringbuffer-config.h>
2fed87ae 52#include "vatomic.h"
4931a13e
MD
53#include "backend.h"
54#include "frontend.h"
a6352fd4 55#include "shm.h"
d51652f7 56#include "tlsfixup.h"
bdcf8d82 57#include "../liblttng-ust/compat.h" /* For ENODATA */
852c2936 58
431d5cf0
MD
59#ifndef max
60#define max(a, b) ((a) > (b) ? (a) : (b))
61#endif
62
64493e4f
MD
63/* Print DBG() messages about events lost only every 1048576 hits */
64#define DBG_PRINT_NR_LOST (1UL << 20)
65
2432c3c9
MD
66/*
67 * Use POSIX SHM: shm_open(3) and shm_unlink(3).
68 * close(2) to close the fd returned by shm_open.
69 * shm_unlink releases the shared memory object name.
70 * ftruncate(2) sets the size of the memory object.
71 * mmap/munmap maps the shared memory obj to a virtual address in the
72 * calling proceess (should be done both in libust and consumer).
73 * See shm_overview(7) for details.
74 * Pass file descriptor returned by shm_open(3) to ltt-sessiond through
75 * a UNIX socket.
76 *
77 * Since we don't need to access the object using its name, we can
78 * immediately shm_unlink(3) it, and only keep the handle with its file
79 * descriptor.
80 */
81
852c2936
MD
82/*
83 * Internal structure representing offsets to use at a sub-buffer switch.
84 */
85struct switch_offsets {
86 unsigned long begin, end, old;
87 size_t pre_header_padding, size;
88 unsigned int switch_new_start:1, switch_new_end:1, switch_old_start:1,
89 switch_old_end:1;
90};
91
a6352fd4 92__thread unsigned int lib_ring_buffer_nesting;
852c2936 93
45e9e699
MD
94/*
95 * TODO: this is unused. Errors are saved within the ring buffer.
96 * Eventually, allow consumerd to print these errors.
97 */
852c2936
MD
98static
99void lib_ring_buffer_print_errors(struct channel *chan,
4cfec15c 100 struct lttng_ust_lib_ring_buffer *buf, int cpu,
b68d3dc0
MD
101 struct lttng_ust_shm_handle *handle)
102 __attribute__((unused));
852c2936 103
852c2936
MD
104/**
105 * lib_ring_buffer_reset - Reset ring buffer to initial values.
106 * @buf: Ring buffer.
107 *
108 * Effectively empty the ring buffer. Should be called when the buffer is not
109 * used for writing. The ring buffer can be opened for reading, but the reader
110 * should not be using the iterator concurrently with reset. The previous
111 * current iterator record is reset.
112 */
4cfec15c 113void lib_ring_buffer_reset(struct lttng_ust_lib_ring_buffer *buf,
38fae1d3 114 struct lttng_ust_shm_handle *handle)
852c2936 115{
1d498196 116 struct channel *chan = shmp(handle, buf->backend.chan);
4cfec15c 117 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
118 unsigned int i;
119
120 /*
121 * Reset iterator first. It will put the subbuffer if it currently holds
122 * it.
123 */
852c2936
MD
124 v_set(config, &buf->offset, 0);
125 for (i = 0; i < chan->backend.num_subbuf; i++) {
4746ae29
MD
126 v_set(config, &shmp_index(handle, buf->commit_hot, i)->cc, 0);
127 v_set(config, &shmp_index(handle, buf->commit_hot, i)->seq, 0);
128 v_set(config, &shmp_index(handle, buf->commit_cold, i)->cc_sb, 0);
852c2936 129 }
a6352fd4
MD
130 uatomic_set(&buf->consumed, 0);
131 uatomic_set(&buf->record_disabled, 0);
852c2936 132 v_set(config, &buf->last_tsc, 0);
1d498196 133 lib_ring_buffer_backend_reset(&buf->backend, handle);
852c2936
MD
134 /* Don't reset number of active readers */
135 v_set(config, &buf->records_lost_full, 0);
136 v_set(config, &buf->records_lost_wrap, 0);
137 v_set(config, &buf->records_lost_big, 0);
138 v_set(config, &buf->records_count, 0);
139 v_set(config, &buf->records_overrun, 0);
140 buf->finalized = 0;
141}
852c2936
MD
142
143/**
144 * channel_reset - Reset channel to initial values.
145 * @chan: Channel.
146 *
147 * Effectively empty the channel. Should be called when the channel is not used
148 * for writing. The channel can be opened for reading, but the reader should not
149 * be using the iterator concurrently with reset. The previous current iterator
150 * record is reset.
151 */
152void channel_reset(struct channel *chan)
153{
154 /*
155 * Reset iterators first. Will put the subbuffer if held for reading.
156 */
a6352fd4 157 uatomic_set(&chan->record_disabled, 0);
852c2936
MD
158 /* Don't reset commit_count_mask, still valid */
159 channel_backend_reset(&chan->backend);
160 /* Don't reset switch/read timer interval */
161 /* Don't reset notifiers and notifier enable bits */
162 /* Don't reset reader reference count */
163}
852c2936
MD
164
165/*
166 * Must be called under cpu hotplug protection.
167 */
4cfec15c 168int lib_ring_buffer_create(struct lttng_ust_lib_ring_buffer *buf,
a6352fd4 169 struct channel_backend *chanb, int cpu,
38fae1d3 170 struct lttng_ust_shm_handle *handle,
1d498196 171 struct shm_object *shmobj)
852c2936 172{
4cfec15c 173 const struct lttng_ust_lib_ring_buffer_config *config = &chanb->config;
14641deb 174 struct channel *chan = caa_container_of(chanb, struct channel, backend);
a3f61e7f 175 void *priv = channel_get_private(chan);
852c2936 176 size_t subbuf_header_size;
2fed87ae 177 uint64_t tsc;
852c2936
MD
178 int ret;
179
180 /* Test for cpu hotplug */
181 if (buf->backend.allocated)
182 return 0;
183
a6352fd4 184 ret = lib_ring_buffer_backend_create(&buf->backend, &chan->backend,
1d498196 185 cpu, handle, shmobj);
852c2936
MD
186 if (ret)
187 return ret;
188
1d498196
MD
189 align_shm(shmobj, __alignof__(struct commit_counters_hot));
190 set_shmp(buf->commit_hot,
191 zalloc_shm(shmobj,
192 sizeof(struct commit_counters_hot) * chan->backend.num_subbuf));
193 if (!shmp(handle, buf->commit_hot)) {
852c2936
MD
194 ret = -ENOMEM;
195 goto free_chanbuf;
196 }
197
1d498196
MD
198 align_shm(shmobj, __alignof__(struct commit_counters_cold));
199 set_shmp(buf->commit_cold,
200 zalloc_shm(shmobj,
201 sizeof(struct commit_counters_cold) * chan->backend.num_subbuf));
202 if (!shmp(handle, buf->commit_cold)) {
852c2936
MD
203 ret = -ENOMEM;
204 goto free_commit;
205 }
206
852c2936
MD
207 /*
208 * Write the subbuffer header for first subbuffer so we know the total
209 * duration of data gathering.
210 */
211 subbuf_header_size = config->cb.subbuffer_header_size();
212 v_set(config, &buf->offset, subbuf_header_size);
4746ae29 213 subbuffer_id_clear_noref(config, &shmp_index(handle, buf->backend.buf_wsb, 0)->id);
1d498196
MD
214 tsc = config->cb.ring_buffer_clock_read(shmp(handle, buf->backend.chan));
215 config->cb.buffer_begin(buf, tsc, 0, handle);
4746ae29 216 v_add(config, subbuf_header_size, &shmp_index(handle, buf->commit_hot, 0)->cc);
852c2936
MD
217
218 if (config->cb.buffer_create) {
1d498196 219 ret = config->cb.buffer_create(buf, priv, cpu, chanb->name, handle);
852c2936
MD
220 if (ret)
221 goto free_init;
222 }
852c2936 223 buf->backend.allocated = 1;
852c2936
MD
224 return 0;
225
226 /* Error handling */
227free_init:
a6352fd4 228 /* commit_cold will be freed by shm teardown */
852c2936 229free_commit:
a6352fd4 230 /* commit_hot will be freed by shm teardown */
852c2936 231free_chanbuf:
852c2936
MD
232 return ret;
233}
234
1d498196 235#if 0
852c2936
MD
236static void switch_buffer_timer(unsigned long data)
237{
4cfec15c 238 struct lttng_ust_lib_ring_buffer *buf = (struct lttng_ust_lib_ring_buffer *)data;
1d498196 239 struct channel *chan = shmp(handle, buf->backend.chan);
4cfec15c 240 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
241
242 /*
243 * Only flush buffers periodically if readers are active.
244 */
824f40b8 245 if (uatomic_read(&buf->active_readers) || uatomic_read(&buf->active_shadow_readers))
1d498196 246 lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE, handle);
852c2936 247
a6352fd4
MD
248 //TODO timers
249 //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
250 // mod_timer_pinned(&buf->switch_timer,
251 // jiffies + chan->switch_timer_interval);
252 //else
253 // mod_timer(&buf->switch_timer,
254 // jiffies + chan->switch_timer_interval);
852c2936 255}
1d498196 256#endif //0
852c2936 257
4cfec15c 258static void lib_ring_buffer_start_switch_timer(struct lttng_ust_lib_ring_buffer *buf,
38fae1d3 259 struct lttng_ust_shm_handle *handle)
852c2936 260{
1d498196 261 struct channel *chan = shmp(handle, buf->backend.chan);
4cfec15c 262 //const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
263
264 if (!chan->switch_timer_interval || buf->switch_timer_enabled)
265 return;
a6352fd4
MD
266 //TODO
267 //init_timer(&buf->switch_timer);
268 //buf->switch_timer.function = switch_buffer_timer;
269 //buf->switch_timer.expires = jiffies + chan->switch_timer_interval;
270 //buf->switch_timer.data = (unsigned long)buf;
271 //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
272 // add_timer_on(&buf->switch_timer, buf->backend.cpu);
273 //else
274 // add_timer(&buf->switch_timer);
852c2936
MD
275 buf->switch_timer_enabled = 1;
276}
277
4cfec15c 278static void lib_ring_buffer_stop_switch_timer(struct lttng_ust_lib_ring_buffer *buf,
38fae1d3 279 struct lttng_ust_shm_handle *handle)
852c2936 280{
1d498196 281 struct channel *chan = shmp(handle, buf->backend.chan);
852c2936
MD
282
283 if (!chan->switch_timer_interval || !buf->switch_timer_enabled)
284 return;
285
a6352fd4
MD
286 //TODO
287 //del_timer_sync(&buf->switch_timer);
852c2936
MD
288 buf->switch_timer_enabled = 0;
289}
290
1d498196 291#if 0
852c2936
MD
292/*
293 * Polling timer to check the channels for data.
294 */
295static void read_buffer_timer(unsigned long data)
296{
4cfec15c 297 struct lttng_ust_lib_ring_buffer *buf = (struct lttng_ust_lib_ring_buffer *)data;
1d498196 298 struct channel *chan = shmp(handle, buf->backend.chan);
4cfec15c 299 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
300
301 CHAN_WARN_ON(chan, !buf->backend.allocated);
302
824f40b8 303 if (uatomic_read(&buf->active_readers) || uatomic_read(&buf->active_shadow_readers))
852c2936 304 && lib_ring_buffer_poll_deliver(config, buf, chan)) {
a6352fd4
MD
305 //TODO
306 //wake_up_interruptible(&buf->read_wait);
307 //wake_up_interruptible(&chan->read_wait);
852c2936
MD
308 }
309
a6352fd4
MD
310 //TODO
311 //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
312 // mod_timer_pinned(&buf->read_timer,
313 // jiffies + chan->read_timer_interval);
314 //else
315 // mod_timer(&buf->read_timer,
316 // jiffies + chan->read_timer_interval);
852c2936 317}
1d498196 318#endif //0
852c2936 319
4cfec15c 320static void lib_ring_buffer_start_read_timer(struct lttng_ust_lib_ring_buffer *buf,
38fae1d3 321 struct lttng_ust_shm_handle *handle)
852c2936 322{
1d498196 323 struct channel *chan = shmp(handle, buf->backend.chan);
4cfec15c 324 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
325
326 if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER
327 || !chan->read_timer_interval
328 || buf->read_timer_enabled)
329 return;
330
a6352fd4
MD
331 //TODO
332 //init_timer(&buf->read_timer);
333 //buf->read_timer.function = read_buffer_timer;
334 //buf->read_timer.expires = jiffies + chan->read_timer_interval;
335 //buf->read_timer.data = (unsigned long)buf;
852c2936 336
a6352fd4
MD
337 //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
338 // add_timer_on(&buf->read_timer, buf->backend.cpu);
339 //else
340 // add_timer(&buf->read_timer);
852c2936
MD
341 buf->read_timer_enabled = 1;
342}
343
4cfec15c 344static void lib_ring_buffer_stop_read_timer(struct lttng_ust_lib_ring_buffer *buf,
38fae1d3 345 struct lttng_ust_shm_handle *handle)
852c2936 346{
1d498196 347 struct channel *chan = shmp(handle, buf->backend.chan);
4cfec15c 348 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
349
350 if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER
351 || !chan->read_timer_interval
352 || !buf->read_timer_enabled)
353 return;
354
a6352fd4
MD
355 //TODO
356 //del_timer_sync(&buf->read_timer);
852c2936
MD
357 /*
358 * do one more check to catch data that has been written in the last
359 * timer period.
360 */
1d498196 361 if (lib_ring_buffer_poll_deliver(config, buf, chan, handle)) {
a6352fd4
MD
362 //TODO
363 //wake_up_interruptible(&buf->read_wait);
364 //wake_up_interruptible(&chan->read_wait);
852c2936
MD
365 }
366 buf->read_timer_enabled = 0;
367}
368
1d498196 369static void channel_unregister_notifiers(struct channel *chan,
38fae1d3 370 struct lttng_ust_shm_handle *handle)
852c2936 371{
4cfec15c 372 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
373 int cpu;
374
852c2936 375 if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
852c2936 376 for_each_possible_cpu(cpu) {
4cfec15c 377 struct lttng_ust_lib_ring_buffer *buf = shmp(handle, chan->backend.buf[cpu].shmp);
a6352fd4 378
1d498196
MD
379 lib_ring_buffer_stop_switch_timer(buf, handle);
380 lib_ring_buffer_stop_read_timer(buf, handle);
852c2936 381 }
852c2936 382 } else {
4cfec15c 383 struct lttng_ust_lib_ring_buffer *buf = shmp(handle, chan->backend.buf[0].shmp);
852c2936 384
1d498196
MD
385 lib_ring_buffer_stop_switch_timer(buf, handle);
386 lib_ring_buffer_stop_read_timer(buf, handle);
852c2936 387 }
8d8a24c8 388 //channel_backend_unregister_notifiers(&chan->backend);
852c2936
MD
389}
390
38fae1d3 391static void channel_free(struct channel *chan, struct lttng_ust_shm_handle *handle,
824f40b8 392 int shadow)
852c2936 393{
824f40b8
MD
394 if (!shadow)
395 channel_backend_free(&chan->backend, handle);
431d5cf0 396 /* chan is freed by shm teardown */
1d498196
MD
397 shm_object_table_destroy(handle->table);
398 free(handle);
852c2936
MD
399}
400
401/**
402 * channel_create - Create channel.
403 * @config: ring buffer instance configuration
404 * @name: name of the channel
a3f61e7f
MD
405 * @priv_data: ring buffer client private data area pointer (output)
406 * @priv_data_size: length, in bytes, of the private data area.
d028eddb 407 * @priv_data_init: initialization data for private data.
852c2936
MD
408 * @buf_addr: pointer the the beginning of the preallocated buffer contiguous
409 * address mapping. It is used only by RING_BUFFER_STATIC
410 * configuration. It can be set to NULL for other backends.
411 * @subbuf_size: subbuffer size
412 * @num_subbuf: number of subbuffers
413 * @switch_timer_interval: Time interval (in us) to fill sub-buffers with
414 * padding to let readers get those sub-buffers.
415 * Used for live streaming.
416 * @read_timer_interval: Time interval (in us) to wake up pending readers.
417 *
418 * Holds cpu hotplug.
419 * Returns NULL on failure.
420 */
4cfec15c 421struct lttng_ust_shm_handle *channel_create(const struct lttng_ust_lib_ring_buffer_config *config,
a3f61e7f
MD
422 const char *name,
423 void **priv_data,
424 size_t priv_data_align,
425 size_t priv_data_size,
d028eddb 426 void *priv_data_init,
a3f61e7f 427 void *buf_addr, size_t subbuf_size,
852c2936 428 size_t num_subbuf, unsigned int switch_timer_interval,
193183fb 429 unsigned int read_timer_interval,
ef9ff354 430 int **shm_fd, int **wait_fd, uint64_t **memory_map_size)
852c2936 431{
1d498196 432 int ret, cpu;
a3f61e7f 433 size_t shmsize, chansize;
852c2936 434 struct channel *chan;
38fae1d3 435 struct lttng_ust_shm_handle *handle;
1d498196 436 struct shm_object *shmobj;
193183fb 437 struct shm_ref *ref;
852c2936
MD
438
439 if (lib_ring_buffer_check_config(config, switch_timer_interval,
440 read_timer_interval))
441 return NULL;
442
38fae1d3 443 handle = zmalloc(sizeof(struct lttng_ust_shm_handle));
431d5cf0
MD
444 if (!handle)
445 return NULL;
446
1d498196
MD
447 /* Allocate table for channel + per-cpu buffers */
448 handle->table = shm_object_table_create(1 + num_possible_cpus());
449 if (!handle->table)
450 goto error_table_alloc;
852c2936 451
1d498196
MD
452 /* Calculate the shm allocation layout */
453 shmsize = sizeof(struct channel);
c1fca457 454 shmsize += offset_align(shmsize, __alignof__(struct lttng_ust_lib_ring_buffer_shmp));
1d498196 455 if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
4cfec15c 456 shmsize += sizeof(struct lttng_ust_lib_ring_buffer_shmp) * num_possible_cpus();
1d498196 457 else
4cfec15c 458 shmsize += sizeof(struct lttng_ust_lib_ring_buffer_shmp);
a3f61e7f
MD
459 chansize = shmsize;
460 shmsize += offset_align(shmsize, priv_data_align);
461 shmsize += priv_data_size;
a6352fd4 462
1d498196 463 shmobj = shm_object_table_append(handle->table, shmsize);
b5a14697
MD
464 if (!shmobj)
465 goto error_append;
57773204 466 /* struct channel is at object 0, offset 0 (hardcoded) */
a3f61e7f 467 set_shmp(handle->chan, zalloc_shm(shmobj, chansize));
57773204
MD
468 assert(handle->chan._ref.index == 0);
469 assert(handle->chan._ref.offset == 0);
1d498196 470 chan = shmp(handle, handle->chan);
a6352fd4 471 if (!chan)
1d498196 472 goto error_append;
a6352fd4 473
a3f61e7f
MD
474 /* space for private data */
475 if (priv_data_size) {
476 DECLARE_SHMP(void, priv_data_alloc);
477
478 align_shm(shmobj, priv_data_align);
479 chan->priv_data_offset = shmobj->allocated_len;
480 set_shmp(priv_data_alloc, zalloc_shm(shmobj, priv_data_size));
481 if (!shmp(handle, priv_data_alloc))
482 goto error_append;
483 *priv_data = channel_get_private(chan);
d028eddb 484 memcpy(*priv_data, priv_data_init, priv_data_size);
a3f61e7f
MD
485 } else {
486 chan->priv_data_offset = -1;
487 *priv_data = NULL;
488 }
489
490 ret = channel_backend_init(&chan->backend, name, config,
1d498196 491 subbuf_size, num_subbuf, handle);
852c2936 492 if (ret)
1d498196 493 goto error_backend_init;
852c2936
MD
494
495 chan->commit_count_mask = (~0UL >> chan->backend.num_subbuf_order);
a6352fd4
MD
496 //TODO
497 //chan->switch_timer_interval = usecs_to_jiffies(switch_timer_interval);
498 //chan->read_timer_interval = usecs_to_jiffies(read_timer_interval);
a6352fd4
MD
499 //TODO
500 //init_waitqueue_head(&chan->read_wait);
501 //init_waitqueue_head(&chan->hp_wait);
852c2936
MD
502
503 if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
852c2936
MD
504 /*
505 * In case of non-hotplug cpu, if the ring-buffer is allocated
506 * in early initcall, it will not be notified of secondary cpus.
507 * In that off case, we need to allocate for all possible cpus.
508 */
852c2936 509 for_each_possible_cpu(cpu) {
4cfec15c 510 struct lttng_ust_lib_ring_buffer *buf = shmp(handle, chan->backend.buf[cpu].shmp);
1d498196
MD
511 lib_ring_buffer_start_switch_timer(buf, handle);
512 lib_ring_buffer_start_read_timer(buf, handle);
852c2936 513 }
852c2936 514 } else {
4cfec15c 515 struct lttng_ust_lib_ring_buffer *buf = shmp(handle, chan->backend.buf[0].shmp);
852c2936 516
1d498196
MD
517 lib_ring_buffer_start_switch_timer(buf, handle);
518 lib_ring_buffer_start_read_timer(buf, handle);
852c2936 519 }
193183fb
MD
520 ref = &handle->chan._ref;
521 shm_get_object_data(handle, ref, shm_fd, wait_fd, memory_map_size);
431d5cf0 522 return handle;
852c2936 523
1d498196
MD
524error_backend_init:
525error_append:
526 shm_object_table_destroy(handle->table);
527error_table_alloc:
431d5cf0 528 free(handle);
852c2936
MD
529 return NULL;
530}
852c2936 531
38fae1d3 532struct lttng_ust_shm_handle *channel_handle_create(int shm_fd, int wait_fd,
193183fb
MD
533 uint64_t memory_map_size)
534{
38fae1d3 535 struct lttng_ust_shm_handle *handle;
193183fb
MD
536 struct shm_object *object;
537
38fae1d3 538 handle = zmalloc(sizeof(struct lttng_ust_shm_handle));
193183fb
MD
539 if (!handle)
540 return NULL;
541
542 /* Allocate table for channel + per-cpu buffers */
543 handle->table = shm_object_table_create(1 + num_possible_cpus());
544 if (!handle->table)
545 goto error_table_alloc;
546 /* Add channel object */
547 object = shm_object_table_append_shadow(handle->table,
548 shm_fd, wait_fd, memory_map_size);
549 if (!object)
550 goto error_table_object;
57773204
MD
551 /* struct channel is at object 0, offset 0 (hardcoded) */
552 handle->chan._ref.index = 0;
553 handle->chan._ref.offset = 0;
193183fb
MD
554 return handle;
555
556error_table_object:
557 shm_object_table_destroy(handle->table);
558error_table_alloc:
559 free(handle);
560 return NULL;
561}
562
38fae1d3 563int channel_handle_add_stream(struct lttng_ust_shm_handle *handle,
193183fb
MD
564 int shm_fd, int wait_fd, uint64_t memory_map_size)
565{
566 struct shm_object *object;
567
568 /* Add stream object */
569 object = shm_object_table_append_shadow(handle->table,
570 shm_fd, wait_fd, memory_map_size);
571 if (!object)
572 return -1;
573 return 0;
574}
575
852c2936 576static
38fae1d3 577void channel_release(struct channel *chan, struct lttng_ust_shm_handle *handle,
824f40b8 578 int shadow)
852c2936 579{
824f40b8 580 channel_free(chan, handle, shadow);
852c2936
MD
581}
582
583/**
584 * channel_destroy - Finalize, wait for q.s. and destroy channel.
585 * @chan: channel to destroy
586 *
587 * Holds cpu hotplug.
431d5cf0
MD
588 * Call "destroy" callback, finalize channels, decrement the channel
589 * reference count. Note that when readers have completed data
590 * consumption of finalized channels, get_subbuf() will return -ENODATA.
a3f61e7f 591 * They should release their handle at that point.
852c2936 592 */
a3f61e7f 593void channel_destroy(struct channel *chan, struct lttng_ust_shm_handle *handle,
824f40b8 594 int shadow)
852c2936 595{
824f40b8
MD
596 if (shadow) {
597 channel_release(chan, handle, shadow);
a3f61e7f 598 return;
824f40b8
MD
599 }
600
1d498196 601 channel_unregister_notifiers(chan, handle);
852c2936 602
45e9e699
MD
603 /*
604 * Note: the consumer takes care of finalizing and switching the
605 * buffers.
606 */
852c2936 607
431d5cf0
MD
608 /*
609 * sessiond/consumer are keeping a reference on the shm file
610 * descriptor directly. No need to refcount.
611 */
824f40b8 612 channel_release(chan, handle, shadow);
a3f61e7f 613 return;
852c2936 614}
852c2936 615
4cfec15c
MD
616struct lttng_ust_lib_ring_buffer *channel_get_ring_buffer(
617 const struct lttng_ust_lib_ring_buffer_config *config,
1d498196 618 struct channel *chan, int cpu,
38fae1d3 619 struct lttng_ust_shm_handle *handle,
ef9ff354
MD
620 int **shm_fd, int **wait_fd,
621 uint64_t **memory_map_size)
852c2936 622{
381c0f1e
MD
623 struct shm_ref *ref;
624
625 if (config->alloc == RING_BUFFER_ALLOC_GLOBAL) {
626 ref = &chan->backend.buf[0].shmp._ref;
627 shm_get_object_data(handle, ref, shm_fd, wait_fd,
628 memory_map_size);
1d498196 629 return shmp(handle, chan->backend.buf[0].shmp);
381c0f1e 630 } else {
e095d803
MD
631 if (cpu >= num_possible_cpus())
632 return NULL;
381c0f1e
MD
633 ref = &chan->backend.buf[cpu].shmp._ref;
634 shm_get_object_data(handle, ref, shm_fd, wait_fd,
635 memory_map_size);
1d498196 636 return shmp(handle, chan->backend.buf[cpu].shmp);
381c0f1e 637 }
852c2936 638}
852c2936 639
4cfec15c 640int lib_ring_buffer_open_read(struct lttng_ust_lib_ring_buffer *buf,
38fae1d3 641 struct lttng_ust_shm_handle *handle,
824f40b8 642 int shadow)
852c2936 643{
824f40b8
MD
644 if (shadow) {
645 if (uatomic_cmpxchg(&buf->active_shadow_readers, 0, 1) != 0)
646 return -EBUSY;
647 cmm_smp_mb();
648 return 0;
649 }
a6352fd4 650 if (uatomic_cmpxchg(&buf->active_readers, 0, 1) != 0)
852c2936 651 return -EBUSY;
a6352fd4 652 cmm_smp_mb();
852c2936
MD
653 return 0;
654}
852c2936 655
4cfec15c 656void lib_ring_buffer_release_read(struct lttng_ust_lib_ring_buffer *buf,
38fae1d3 657 struct lttng_ust_shm_handle *handle,
824f40b8 658 int shadow)
852c2936 659{
1d498196 660 struct channel *chan = shmp(handle, buf->backend.chan);
852c2936 661
824f40b8
MD
662 if (shadow) {
663 CHAN_WARN_ON(chan, uatomic_read(&buf->active_shadow_readers) != 1);
664 cmm_smp_mb();
665 uatomic_dec(&buf->active_shadow_readers);
666 return;
667 }
a6352fd4
MD
668 CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1);
669 cmm_smp_mb();
670 uatomic_dec(&buf->active_readers);
852c2936
MD
671}
672
673/**
674 * lib_ring_buffer_snapshot - save subbuffer position snapshot (for read)
675 * @buf: ring buffer
676 * @consumed: consumed count indicating the position where to read
677 * @produced: produced count, indicates position when to stop reading
678 *
679 * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no
680 * data to read at consumed position, or 0 if the get operation succeeds.
852c2936
MD
681 */
682
4cfec15c 683int lib_ring_buffer_snapshot(struct lttng_ust_lib_ring_buffer *buf,
1d498196 684 unsigned long *consumed, unsigned long *produced,
38fae1d3 685 struct lttng_ust_shm_handle *handle)
852c2936 686{
1d498196 687 struct channel *chan = shmp(handle, buf->backend.chan);
4cfec15c 688 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
689 unsigned long consumed_cur, write_offset;
690 int finalized;
691
14641deb 692 finalized = CMM_ACCESS_ONCE(buf->finalized);
852c2936
MD
693 /*
694 * Read finalized before counters.
695 */
a6352fd4
MD
696 cmm_smp_rmb();
697 consumed_cur = uatomic_read(&buf->consumed);
852c2936
MD
698 /*
699 * No need to issue a memory barrier between consumed count read and
700 * write offset read, because consumed count can only change
701 * concurrently in overwrite mode, and we keep a sequence counter
702 * identifier derived from the write offset to check we are getting
703 * the same sub-buffer we are expecting (the sub-buffers are atomically
704 * "tagged" upon writes, tags are checked upon read).
705 */
706 write_offset = v_read(config, &buf->offset);
707
708 /*
709 * Check that we are not about to read the same subbuffer in
710 * which the writer head is.
711 */
712 if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_cur, chan)
713 == 0)
714 goto nodata;
715
716 *consumed = consumed_cur;
717 *produced = subbuf_trunc(write_offset, chan);
718
719 return 0;
720
721nodata:
722 /*
723 * The memory barriers __wait_event()/wake_up_interruptible() take care
724 * of "raw_spin_is_locked" memory ordering.
725 */
726 if (finalized)
727 return -ENODATA;
852c2936
MD
728 else
729 return -EAGAIN;
730}
852c2936
MD
731
732/**
733 * lib_ring_buffer_put_snapshot - move consumed counter forward
734 * @buf: ring buffer
735 * @consumed_new: new consumed count value
736 */
4cfec15c 737void lib_ring_buffer_move_consumer(struct lttng_ust_lib_ring_buffer *buf,
1d498196 738 unsigned long consumed_new,
38fae1d3 739 struct lttng_ust_shm_handle *handle)
852c2936 740{
4cfec15c 741 struct lttng_ust_lib_ring_buffer_backend *bufb = &buf->backend;
1d498196 742 struct channel *chan = shmp(handle, bufb->chan);
852c2936
MD
743 unsigned long consumed;
744
824f40b8
MD
745 CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1
746 && uatomic_read(&buf->active_shadow_readers) != 1);
852c2936
MD
747
748 /*
749 * Only push the consumed value forward.
750 * If the consumed cmpxchg fails, this is because we have been pushed by
751 * the writer in flight recorder mode.
752 */
a6352fd4 753 consumed = uatomic_read(&buf->consumed);
852c2936 754 while ((long) consumed - (long) consumed_new < 0)
a6352fd4
MD
755 consumed = uatomic_cmpxchg(&buf->consumed, consumed,
756 consumed_new);
852c2936 757}
852c2936
MD
758
759/**
760 * lib_ring_buffer_get_subbuf - get exclusive access to subbuffer for reading
761 * @buf: ring buffer
762 * @consumed: consumed count indicating the position where to read
763 *
764 * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no
765 * data to read at consumed position, or 0 if the get operation succeeds.
852c2936 766 */
4cfec15c 767int lib_ring_buffer_get_subbuf(struct lttng_ust_lib_ring_buffer *buf,
1d498196 768 unsigned long consumed,
38fae1d3 769 struct lttng_ust_shm_handle *handle)
852c2936 770{
1d498196 771 struct channel *chan = shmp(handle, buf->backend.chan);
4cfec15c 772 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
773 unsigned long consumed_cur, consumed_idx, commit_count, write_offset;
774 int ret;
775 int finalized;
776
777retry:
14641deb 778 finalized = CMM_ACCESS_ONCE(buf->finalized);
852c2936
MD
779 /*
780 * Read finalized before counters.
781 */
a6352fd4
MD
782 cmm_smp_rmb();
783 consumed_cur = uatomic_read(&buf->consumed);
852c2936 784 consumed_idx = subbuf_index(consumed, chan);
4746ae29 785 commit_count = v_read(config, &shmp_index(handle, buf->commit_cold, consumed_idx)->cc_sb);
852c2936
MD
786 /*
787 * Make sure we read the commit count before reading the buffer
788 * data and the write offset. Correct consumed offset ordering
789 * wrt commit count is insured by the use of cmpxchg to update
790 * the consumed offset.
852c2936 791 */
a6352fd4
MD
792 /*
793 * Local rmb to match the remote wmb to read the commit count
794 * before the buffer data and the write offset.
795 */
796 cmm_smp_rmb();
852c2936
MD
797
798 write_offset = v_read(config, &buf->offset);
799
800 /*
801 * Check that the buffer we are getting is after or at consumed_cur
802 * position.
803 */
804 if ((long) subbuf_trunc(consumed, chan)
805 - (long) subbuf_trunc(consumed_cur, chan) < 0)
806 goto nodata;
807
808 /*
809 * Check that the subbuffer we are trying to consume has been
810 * already fully committed.
811 */
812 if (((commit_count - chan->backend.subbuf_size)
813 & chan->commit_count_mask)
814 - (buf_trunc(consumed_cur, chan)
815 >> chan->backend.num_subbuf_order)
816 != 0)
817 goto nodata;
818
819 /*
820 * Check that we are not about to read the same subbuffer in
821 * which the writer head is.
822 */
823 if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_cur, chan)
824 == 0)
825 goto nodata;
826
827 /*
828 * Failure to get the subbuffer causes a busy-loop retry without going
829 * to a wait queue. These are caused by short-lived race windows where
830 * the writer is getting access to a subbuffer we were trying to get
831 * access to. Also checks that the "consumed" buffer count we are
832 * looking for matches the one contained in the subbuffer id.
833 */
834 ret = update_read_sb_index(config, &buf->backend, &chan->backend,
1d498196
MD
835 consumed_idx, buf_trunc_val(consumed, chan),
836 handle);
852c2936
MD
837 if (ret)
838 goto retry;
839 subbuffer_id_clear_noref(config, &buf->backend.buf_rsb.id);
840
841 buf->get_subbuf_consumed = consumed;
842 buf->get_subbuf = 1;
843
844 return 0;
845
846nodata:
847 /*
848 * The memory barriers __wait_event()/wake_up_interruptible() take care
849 * of "raw_spin_is_locked" memory ordering.
850 */
851 if (finalized)
852 return -ENODATA;
852c2936
MD
853 else
854 return -EAGAIN;
855}
852c2936
MD
856
857/**
858 * lib_ring_buffer_put_subbuf - release exclusive subbuffer access
859 * @buf: ring buffer
860 */
4cfec15c 861void lib_ring_buffer_put_subbuf(struct lttng_ust_lib_ring_buffer *buf,
38fae1d3 862 struct lttng_ust_shm_handle *handle)
852c2936 863{
4cfec15c 864 struct lttng_ust_lib_ring_buffer_backend *bufb = &buf->backend;
1d498196 865 struct channel *chan = shmp(handle, bufb->chan);
4cfec15c 866 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
867 unsigned long read_sb_bindex, consumed_idx, consumed;
868
824f40b8
MD
869 CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1
870 && uatomic_read(&buf->active_shadow_readers) != 1);
852c2936
MD
871
872 if (!buf->get_subbuf) {
873 /*
874 * Reader puts a subbuffer it did not get.
875 */
876 CHAN_WARN_ON(chan, 1);
877 return;
878 }
879 consumed = buf->get_subbuf_consumed;
880 buf->get_subbuf = 0;
881
882 /*
883 * Clear the records_unread counter. (overruns counter)
884 * Can still be non-zero if a file reader simply grabbed the data
885 * without using iterators.
886 * Can be below zero if an iterator is used on a snapshot more than
887 * once.
888 */
889 read_sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id);
890 v_add(config, v_read(config,
4746ae29 891 &shmp(handle, shmp_index(handle, bufb->array, read_sb_bindex)->shmp)->records_unread),
852c2936 892 &bufb->records_read);
4746ae29 893 v_set(config, &shmp(handle, shmp_index(handle, bufb->array, read_sb_bindex)->shmp)->records_unread, 0);
852c2936
MD
894 CHAN_WARN_ON(chan, config->mode == RING_BUFFER_OVERWRITE
895 && subbuffer_id_is_noref(config, bufb->buf_rsb.id));
896 subbuffer_id_set_noref(config, &bufb->buf_rsb.id);
897
898 /*
899 * Exchange the reader subbuffer with the one we put in its place in the
900 * writer subbuffer table. Expect the original consumed count. If
901 * update_read_sb_index fails, this is because the writer updated the
902 * subbuffer concurrently. We should therefore keep the subbuffer we
903 * currently have: it has become invalid to try reading this sub-buffer
904 * consumed count value anyway.
905 */
906 consumed_idx = subbuf_index(consumed, chan);
907 update_read_sb_index(config, &buf->backend, &chan->backend,
1d498196
MD
908 consumed_idx, buf_trunc_val(consumed, chan),
909 handle);
852c2936
MD
910 /*
911 * update_read_sb_index return value ignored. Don't exchange sub-buffer
912 * if the writer concurrently updated it.
913 */
914}
852c2936
MD
915
916/*
917 * cons_offset is an iterator on all subbuffer offsets between the reader
918 * position and the writer position. (inclusive)
919 */
920static
4cfec15c 921void lib_ring_buffer_print_subbuffer_errors(struct lttng_ust_lib_ring_buffer *buf,
852c2936
MD
922 struct channel *chan,
923 unsigned long cons_offset,
1d498196 924 int cpu,
38fae1d3 925 struct lttng_ust_shm_handle *handle)
852c2936 926{
4cfec15c 927 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
928 unsigned long cons_idx, commit_count, commit_count_sb;
929
930 cons_idx = subbuf_index(cons_offset, chan);
4746ae29
MD
931 commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, cons_idx)->cc);
932 commit_count_sb = v_read(config, &shmp_index(handle, buf->commit_cold, cons_idx)->cc_sb);
852c2936
MD
933
934 if (subbuf_offset(commit_count, chan) != 0)
4d3c9523 935 DBG("ring buffer %s, cpu %d: "
852c2936
MD
936 "commit count in subbuffer %lu,\n"
937 "expecting multiples of %lu bytes\n"
938 " [ %lu bytes committed, %lu bytes reader-visible ]\n",
939 chan->backend.name, cpu, cons_idx,
940 chan->backend.subbuf_size,
941 commit_count, commit_count_sb);
942
4d3c9523 943 DBG("ring buffer: %s, cpu %d: %lu bytes committed\n",
852c2936
MD
944 chan->backend.name, cpu, commit_count);
945}
946
947static
4cfec15c 948void lib_ring_buffer_print_buffer_errors(struct lttng_ust_lib_ring_buffer *buf,
852c2936 949 struct channel *chan,
1d498196 950 void *priv, int cpu,
38fae1d3 951 struct lttng_ust_shm_handle *handle)
852c2936 952{
4cfec15c 953 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
954 unsigned long write_offset, cons_offset;
955
852c2936
MD
956 /*
957 * No need to order commit_count, write_offset and cons_offset reads
958 * because we execute at teardown when no more writer nor reader
959 * references are left.
960 */
961 write_offset = v_read(config, &buf->offset);
a6352fd4 962 cons_offset = uatomic_read(&buf->consumed);
852c2936 963 if (write_offset != cons_offset)
4d3c9523 964 DBG("ring buffer %s, cpu %d: "
852c2936
MD
965 "non-consumed data\n"
966 " [ %lu bytes written, %lu bytes read ]\n",
967 chan->backend.name, cpu, write_offset, cons_offset);
968
a6352fd4 969 for (cons_offset = uatomic_read(&buf->consumed);
852c2936
MD
970 (long) (subbuf_trunc((unsigned long) v_read(config, &buf->offset),
971 chan)
972 - cons_offset) > 0;
973 cons_offset = subbuf_align(cons_offset, chan))
974 lib_ring_buffer_print_subbuffer_errors(buf, chan, cons_offset,
1d498196 975 cpu, handle);
852c2936
MD
976}
977
978static
979void lib_ring_buffer_print_errors(struct channel *chan,
4cfec15c 980 struct lttng_ust_lib_ring_buffer *buf, int cpu,
38fae1d3 981 struct lttng_ust_shm_handle *handle)
852c2936 982{
4cfec15c 983 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
a3f61e7f 984 void *priv = channel_get_private(chan);
852c2936 985
a1360615
MD
986 if (!strcmp(chan->backend.name, "relay-metadata-mmap")) {
987 DBG("ring buffer %s: %lu records written, "
988 "%lu records overrun\n",
989 chan->backend.name,
990 v_read(config, &buf->records_count),
991 v_read(config, &buf->records_overrun));
992 } else {
993 DBG("ring buffer %s, cpu %d: %lu records written, "
994 "%lu records overrun\n",
995 chan->backend.name, cpu,
996 v_read(config, &buf->records_count),
997 v_read(config, &buf->records_overrun));
998
999 if (v_read(config, &buf->records_lost_full)
1000 || v_read(config, &buf->records_lost_wrap)
1001 || v_read(config, &buf->records_lost_big))
1002 DBG("ring buffer %s, cpu %d: records were lost. Caused by:\n"
1003 " [ %lu buffer full, %lu nest buffer wrap-around, "
1004 "%lu event too big ]\n",
1005 chan->backend.name, cpu,
1006 v_read(config, &buf->records_lost_full),
1007 v_read(config, &buf->records_lost_wrap),
1008 v_read(config, &buf->records_lost_big));
1009 }
1d498196 1010 lib_ring_buffer_print_buffer_errors(buf, chan, priv, cpu, handle);
852c2936
MD
1011}
1012
1013/*
1014 * lib_ring_buffer_switch_old_start: Populate old subbuffer header.
1015 *
1016 * Only executed when the buffer is finalized, in SWITCH_FLUSH.
1017 */
1018static
4cfec15c 1019void lib_ring_buffer_switch_old_start(struct lttng_ust_lib_ring_buffer *buf,
852c2936
MD
1020 struct channel *chan,
1021 struct switch_offsets *offsets,
2fed87ae 1022 uint64_t tsc,
38fae1d3 1023 struct lttng_ust_shm_handle *handle)
852c2936 1024{
4cfec15c 1025 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
1026 unsigned long oldidx = subbuf_index(offsets->old, chan);
1027 unsigned long commit_count;
1028
1d498196 1029 config->cb.buffer_begin(buf, tsc, oldidx, handle);
852c2936
MD
1030
1031 /*
1032 * Order all writes to buffer before the commit count update that will
1033 * determine that the subbuffer is full.
1034 */
a6352fd4 1035 cmm_smp_wmb();
852c2936 1036 v_add(config, config->cb.subbuffer_header_size(),
4746ae29
MD
1037 &shmp_index(handle, buf->commit_hot, oldidx)->cc);
1038 commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, oldidx)->cc);
852c2936
MD
1039 /* Check if the written buffer has to be delivered */
1040 lib_ring_buffer_check_deliver(config, buf, chan, offsets->old,
1d498196 1041 commit_count, oldidx, handle);
852c2936
MD
1042 lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx,
1043 offsets->old, commit_count,
1d498196
MD
1044 config->cb.subbuffer_header_size(),
1045 handle);
852c2936
MD
1046}
1047
1048/*
1049 * lib_ring_buffer_switch_old_end: switch old subbuffer
1050 *
1051 * Note : offset_old should never be 0 here. It is ok, because we never perform
1052 * buffer switch on an empty subbuffer in SWITCH_ACTIVE mode. The caller
1053 * increments the offset_old value when doing a SWITCH_FLUSH on an empty
1054 * subbuffer.
1055 */
1056static
4cfec15c 1057void lib_ring_buffer_switch_old_end(struct lttng_ust_lib_ring_buffer *buf,
852c2936
MD
1058 struct channel *chan,
1059 struct switch_offsets *offsets,
2fed87ae 1060 uint64_t tsc,
38fae1d3 1061 struct lttng_ust_shm_handle *handle)
852c2936 1062{
4cfec15c 1063 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
1064 unsigned long oldidx = subbuf_index(offsets->old - 1, chan);
1065 unsigned long commit_count, padding_size, data_size;
1066
1067 data_size = subbuf_offset(offsets->old - 1, chan) + 1;
1068 padding_size = chan->backend.subbuf_size - data_size;
1d498196
MD
1069 subbuffer_set_data_size(config, &buf->backend, oldidx, data_size,
1070 handle);
852c2936
MD
1071
1072 /*
1073 * Order all writes to buffer before the commit count update that will
1074 * determine that the subbuffer is full.
1075 */
a6352fd4 1076 cmm_smp_wmb();
4746ae29
MD
1077 v_add(config, padding_size, &shmp_index(handle, buf->commit_hot, oldidx)->cc);
1078 commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, oldidx)->cc);
852c2936 1079 lib_ring_buffer_check_deliver(config, buf, chan, offsets->old - 1,
1d498196 1080 commit_count, oldidx, handle);
852c2936
MD
1081 lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx,
1082 offsets->old, commit_count,
1d498196 1083 padding_size, handle);
852c2936
MD
1084}
1085
1086/*
1087 * lib_ring_buffer_switch_new_start: Populate new subbuffer.
1088 *
1089 * This code can be executed unordered : writers may already have written to the
1090 * sub-buffer before this code gets executed, caution. The commit makes sure
1091 * that this code is executed before the deliver of this sub-buffer.
1092 */
1093static
4cfec15c 1094void lib_ring_buffer_switch_new_start(struct lttng_ust_lib_ring_buffer *buf,
852c2936
MD
1095 struct channel *chan,
1096 struct switch_offsets *offsets,
2fed87ae 1097 uint64_t tsc,
38fae1d3 1098 struct lttng_ust_shm_handle *handle)
852c2936 1099{
4cfec15c 1100 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
1101 unsigned long beginidx = subbuf_index(offsets->begin, chan);
1102 unsigned long commit_count;
1103
1d498196 1104 config->cb.buffer_begin(buf, tsc, beginidx, handle);
852c2936
MD
1105
1106 /*
1107 * Order all writes to buffer before the commit count update that will
1108 * determine that the subbuffer is full.
1109 */
a6352fd4 1110 cmm_smp_wmb();
852c2936 1111 v_add(config, config->cb.subbuffer_header_size(),
4746ae29
MD
1112 &shmp_index(handle, buf->commit_hot, beginidx)->cc);
1113 commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, beginidx)->cc);
852c2936
MD
1114 /* Check if the written buffer has to be delivered */
1115 lib_ring_buffer_check_deliver(config, buf, chan, offsets->begin,
1d498196 1116 commit_count, beginidx, handle);
852c2936
MD
1117 lib_ring_buffer_write_commit_counter(config, buf, chan, beginidx,
1118 offsets->begin, commit_count,
1d498196
MD
1119 config->cb.subbuffer_header_size(),
1120 handle);
852c2936
MD
1121}
1122
1123/*
1124 * lib_ring_buffer_switch_new_end: finish switching current subbuffer
1125 *
1126 * The only remaining threads could be the ones with pending commits. They will
1127 * have to do the deliver themselves.
1128 */
1129static
4cfec15c 1130void lib_ring_buffer_switch_new_end(struct lttng_ust_lib_ring_buffer *buf,
1d498196
MD
1131 struct channel *chan,
1132 struct switch_offsets *offsets,
2fed87ae 1133 uint64_t tsc,
38fae1d3 1134 struct lttng_ust_shm_handle *handle)
852c2936 1135{
4cfec15c 1136 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
1137 unsigned long endidx = subbuf_index(offsets->end - 1, chan);
1138 unsigned long commit_count, padding_size, data_size;
1139
1140 data_size = subbuf_offset(offsets->end - 1, chan) + 1;
1141 padding_size = chan->backend.subbuf_size - data_size;
1d498196
MD
1142 subbuffer_set_data_size(config, &buf->backend, endidx, data_size,
1143 handle);
852c2936
MD
1144
1145 /*
1146 * Order all writes to buffer before the commit count update that will
1147 * determine that the subbuffer is full.
1148 */
a6352fd4 1149 cmm_smp_wmb();
4746ae29
MD
1150 v_add(config, padding_size, &shmp_index(handle, buf->commit_hot, endidx)->cc);
1151 commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, endidx)->cc);
852c2936 1152 lib_ring_buffer_check_deliver(config, buf, chan, offsets->end - 1,
1d498196 1153 commit_count, endidx, handle);
852c2936
MD
1154 lib_ring_buffer_write_commit_counter(config, buf, chan, endidx,
1155 offsets->end, commit_count,
1d498196 1156 padding_size, handle);
852c2936
MD
1157}
1158
1159/*
1160 * Returns :
1161 * 0 if ok
1162 * !0 if execution must be aborted.
1163 */
1164static
1165int lib_ring_buffer_try_switch_slow(enum switch_mode mode,
4cfec15c 1166 struct lttng_ust_lib_ring_buffer *buf,
852c2936
MD
1167 struct channel *chan,
1168 struct switch_offsets *offsets,
2fed87ae 1169 uint64_t *tsc)
852c2936 1170{
4cfec15c 1171 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
1172 unsigned long off;
1173
1174 offsets->begin = v_read(config, &buf->offset);
1175 offsets->old = offsets->begin;
1176 offsets->switch_old_start = 0;
1177 off = subbuf_offset(offsets->begin, chan);
1178
1179 *tsc = config->cb.ring_buffer_clock_read(chan);
1180
1181 /*
1182 * Ensure we flush the header of an empty subbuffer when doing the
1183 * finalize (SWITCH_FLUSH). This ensures that we end up knowing the
1184 * total data gathering duration even if there were no records saved
1185 * after the last buffer switch.
1186 * In SWITCH_ACTIVE mode, switch the buffer when it contains events.
1187 * SWITCH_ACTIVE only flushes the current subbuffer, dealing with end of
1188 * subbuffer header as appropriate.
1189 * The next record that reserves space will be responsible for
1190 * populating the following subbuffer header. We choose not to populate
1191 * the next subbuffer header here because we want to be able to use
a6352fd4
MD
1192 * SWITCH_ACTIVE for periodical buffer flush, which must
1193 * guarantee that all the buffer content (records and header
1194 * timestamps) are visible to the reader. This is required for
1195 * quiescence guarantees for the fusion merge.
852c2936
MD
1196 */
1197 if (mode == SWITCH_FLUSH || off > 0) {
b5a3dfa5 1198 if (caa_unlikely(off == 0)) {
852c2936
MD
1199 /*
1200 * The client does not save any header information.
1201 * Don't switch empty subbuffer on finalize, because it
1202 * is invalid to deliver a completely empty subbuffer.
1203 */
1204 if (!config->cb.subbuffer_header_size())
1205 return -1;
1206 /*
1207 * Need to write the subbuffer start header on finalize.
1208 */
1209 offsets->switch_old_start = 1;
1210 }
1211 offsets->begin = subbuf_align(offsets->begin, chan);
1212 } else
1213 return -1; /* we do not have to switch : buffer is empty */
1214 /* Note: old points to the next subbuf at offset 0 */
1215 offsets->end = offsets->begin;
1216 return 0;
1217}
1218
1219/*
1220 * Force a sub-buffer switch. This operation is completely reentrant : can be
1221 * called while tracing is active with absolutely no lock held.
1222 *
1223 * Note, however, that as a v_cmpxchg is used for some atomic
1224 * operations, this function must be called from the CPU which owns the buffer
1225 * for a ACTIVE flush.
1226 */
4cfec15c 1227void lib_ring_buffer_switch_slow(struct lttng_ust_lib_ring_buffer *buf, enum switch_mode mode,
38fae1d3 1228 struct lttng_ust_shm_handle *handle)
852c2936 1229{
1d498196 1230 struct channel *chan = shmp(handle, buf->backend.chan);
4cfec15c 1231 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
852c2936
MD
1232 struct switch_offsets offsets;
1233 unsigned long oldidx;
2fed87ae 1234 uint64_t tsc;
852c2936
MD
1235
1236 offsets.size = 0;
1237
1238 /*
1239 * Perform retryable operations.
1240 */
1241 do {
1242 if (lib_ring_buffer_try_switch_slow(mode, buf, chan, &offsets,
1243 &tsc))
1244 return; /* Switch not needed */
1245 } while (v_cmpxchg(config, &buf->offset, offsets.old, offsets.end)
1246 != offsets.old);
1247
1248 /*
1249 * Atomically update last_tsc. This update races against concurrent
1250 * atomic updates, but the race will always cause supplementary full TSC
1251 * records, never the opposite (missing a full TSC record when it would
1252 * be needed).
1253 */
1254 save_last_tsc(config, buf, tsc);
1255
1256 /*
1257 * Push the reader if necessary
1258 */
1259 lib_ring_buffer_reserve_push_reader(buf, chan, offsets.old);
1260
1261 oldidx = subbuf_index(offsets.old, chan);
1d498196 1262 lib_ring_buffer_clear_noref(config, &buf->backend, oldidx, handle);
852c2936
MD
1263
1264 /*
1265 * May need to populate header start on SWITCH_FLUSH.
1266 */
1267 if (offsets.switch_old_start) {
1d498196 1268 lib_ring_buffer_switch_old_start(buf, chan, &offsets, tsc, handle);
852c2936
MD
1269 offsets.old += config->cb.subbuffer_header_size();
1270 }
1271
1272 /*
1273 * Switch old subbuffer.
1274 */
1d498196 1275 lib_ring_buffer_switch_old_end(buf, chan, &offsets, tsc, handle);
852c2936 1276}
852c2936
MD
1277
1278/*
1279 * Returns :
1280 * 0 if ok
1281 * -ENOSPC if event size is too large for packet.
1282 * -ENOBUFS if there is currently not enough space in buffer for the event.
1283 * -EIO if data cannot be written into the buffer for any other reason.
1284 */
1285static
4cfec15c 1286int lib_ring_buffer_try_reserve_slow(struct lttng_ust_lib_ring_buffer *buf,
852c2936
MD
1287 struct channel *chan,
1288 struct switch_offsets *offsets,
4cfec15c 1289 struct lttng_ust_lib_ring_buffer_ctx *ctx)
852c2936 1290{
4cfec15c 1291 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
38fae1d3 1292 struct lttng_ust_shm_handle *handle = ctx->handle;
852c2936
MD
1293 unsigned long reserve_commit_diff;
1294
1295 offsets->begin = v_read(config, &buf->offset);
1296 offsets->old = offsets->begin;
1297 offsets->switch_new_start = 0;
1298 offsets->switch_new_end = 0;
1299 offsets->switch_old_end = 0;
1300 offsets->pre_header_padding = 0;
1301
1302 ctx->tsc = config->cb.ring_buffer_clock_read(chan);
1303 if ((int64_t) ctx->tsc == -EIO)
1304 return -EIO;
1305
1306 if (last_tsc_overflow(config, buf, ctx->tsc))
1307 ctx->rflags |= RING_BUFFER_RFLAG_FULL_TSC;
1308
b5a3dfa5 1309 if (caa_unlikely(subbuf_offset(offsets->begin, ctx->chan) == 0)) {
852c2936
MD
1310 offsets->switch_new_start = 1; /* For offsets->begin */
1311 } else {
1312 offsets->size = config->cb.record_header_size(config, chan,
1313 offsets->begin,
1314 &offsets->pre_header_padding,
1315 ctx);
1316 offsets->size +=
1317 lib_ring_buffer_align(offsets->begin + offsets->size,
1318 ctx->largest_align)
1319 + ctx->data_size;
b5a3dfa5 1320 if (caa_unlikely(subbuf_offset(offsets->begin, chan) +
852c2936
MD
1321 offsets->size > chan->backend.subbuf_size)) {
1322 offsets->switch_old_end = 1; /* For offsets->old */
1323 offsets->switch_new_start = 1; /* For offsets->begin */
1324 }
1325 }
b5a3dfa5 1326 if (caa_unlikely(offsets->switch_new_start)) {
852c2936
MD
1327 unsigned long sb_index;
1328
1329 /*
1330 * We are typically not filling the previous buffer completely.
1331 */
b5a3dfa5 1332 if (caa_likely(offsets->switch_old_end))
852c2936
MD
1333 offsets->begin = subbuf_align(offsets->begin, chan);
1334 offsets->begin = offsets->begin
1335 + config->cb.subbuffer_header_size();
1336 /* Test new buffer integrity */
1337 sb_index = subbuf_index(offsets->begin, chan);
1338 reserve_commit_diff =
1339 (buf_trunc(offsets->begin, chan)
1340 >> chan->backend.num_subbuf_order)
1341 - ((unsigned long) v_read(config,
4746ae29 1342 &shmp_index(handle, buf->commit_cold, sb_index)->cc_sb)
852c2936 1343 & chan->commit_count_mask);
b5a3dfa5 1344 if (caa_likely(reserve_commit_diff == 0)) {
852c2936 1345 /* Next subbuffer not being written to. */
b5a3dfa5 1346 if (caa_unlikely(config->mode != RING_BUFFER_OVERWRITE &&
852c2936
MD
1347 subbuf_trunc(offsets->begin, chan)
1348 - subbuf_trunc((unsigned long)
a6352fd4 1349 uatomic_read(&buf->consumed), chan)
852c2936 1350 >= chan->backend.buf_size)) {
64493e4f
MD
1351 unsigned long nr_lost;
1352
852c2936
MD
1353 /*
1354 * We do not overwrite non consumed buffers
1355 * and we are full : record is lost.
1356 */
64493e4f 1357 nr_lost = v_read(config, &buf->records_lost_full);
852c2936 1358 v_inc(config, &buf->records_lost_full);
64493e4f
MD
1359 if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) {
1360 DBG("%lu or more records lost in (%s:%d) (buffer full)\n",
1361 nr_lost + 1, chan->backend.name,
1362 buf->backend.cpu);
1363 }
852c2936
MD
1364 return -ENOBUFS;
1365 } else {
1366 /*
1367 * Next subbuffer not being written to, and we
1368 * are either in overwrite mode or the buffer is
1369 * not full. It's safe to write in this new
1370 * subbuffer.
1371 */
1372 }
1373 } else {
64493e4f
MD
1374 unsigned long nr_lost;
1375
852c2936
MD
1376 /*
1377 * Next subbuffer reserve offset does not match the
1378 * commit offset. Drop record in producer-consumer and
1379 * overwrite mode. Caused by either a writer OOPS or too
1380 * many nested writes over a reserve/commit pair.
1381 */
64493e4f 1382 nr_lost = v_read(config, &buf->records_lost_wrap);
852c2936 1383 v_inc(config, &buf->records_lost_wrap);
64493e4f
MD
1384 if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) {
1385 DBG("%lu or more records lost in (%s:%d) (wrap-around)\n",
1386 nr_lost + 1, chan->backend.name,
1387 buf->backend.cpu);
1388 }
852c2936
MD
1389 return -EIO;
1390 }
1391 offsets->size =
1392 config->cb.record_header_size(config, chan,
1393 offsets->begin,
1394 &offsets->pre_header_padding,
1395 ctx);
1396 offsets->size +=
1397 lib_ring_buffer_align(offsets->begin + offsets->size,
1398 ctx->largest_align)
1399 + ctx->data_size;
b5a3dfa5 1400 if (caa_unlikely(subbuf_offset(offsets->begin, chan)
852c2936 1401 + offsets->size > chan->backend.subbuf_size)) {
64493e4f
MD
1402 unsigned long nr_lost;
1403
852c2936
MD
1404 /*
1405 * Record too big for subbuffers, report error, don't
1406 * complete the sub-buffer switch.
1407 */
64493e4f 1408 nr_lost = v_read(config, &buf->records_lost_big);
852c2936 1409 v_inc(config, &buf->records_lost_big);
64493e4f
MD
1410 if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) {
1411 DBG("%lu or more records lost in (%s:%d) record size "
1412 " of %zu bytes is too large for buffer\n",
1413 nr_lost + 1, chan->backend.name,
1414 buf->backend.cpu, offsets->size);
1415 }
852c2936
MD
1416 return -ENOSPC;
1417 } else {
1418 /*
1419 * We just made a successful buffer switch and the
1420 * record fits in the new subbuffer. Let's write.
1421 */
1422 }
1423 } else {
1424 /*
1425 * Record fits in the current buffer and we are not on a switch
1426 * boundary. It's safe to write.
1427 */
1428 }
1429 offsets->end = offsets->begin + offsets->size;
1430
b5a3dfa5 1431 if (caa_unlikely(subbuf_offset(offsets->end, chan) == 0)) {
852c2936
MD
1432 /*
1433 * The offset_end will fall at the very beginning of the next
1434 * subbuffer.
1435 */
1436 offsets->switch_new_end = 1; /* For offsets->begin */
1437 }
1438 return 0;
1439}
1440
1441/**
1442 * lib_ring_buffer_reserve_slow - Atomic slot reservation in a buffer.
1443 * @ctx: ring buffer context.
1444 *
1445 * Return : -NOBUFS if not enough space, -ENOSPC if event size too large,
1446 * -EIO for other errors, else returns 0.
1447 * It will take care of sub-buffer switching.
1448 */
4cfec15c 1449int lib_ring_buffer_reserve_slow(struct lttng_ust_lib_ring_buffer_ctx *ctx)
852c2936
MD
1450{
1451 struct channel *chan = ctx->chan;
38fae1d3 1452 struct lttng_ust_shm_handle *handle = ctx->handle;
4cfec15c
MD
1453 const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1454 struct lttng_ust_lib_ring_buffer *buf;
852c2936
MD
1455 struct switch_offsets offsets;
1456 int ret;
1457
1458 if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
1d498196 1459 buf = shmp(handle, chan->backend.buf[ctx->cpu].shmp);
852c2936 1460 else
1d498196 1461 buf = shmp(handle, chan->backend.buf[0].shmp);
852c2936
MD
1462 ctx->buf = buf;
1463
1464 offsets.size = 0;
1465
1466 do {
1467 ret = lib_ring_buffer_try_reserve_slow(buf, chan, &offsets,
1468 ctx);
b5a3dfa5 1469 if (caa_unlikely(ret))
852c2936 1470 return ret;
b5a3dfa5 1471 } while (caa_unlikely(v_cmpxchg(config, &buf->offset, offsets.old,
852c2936
MD
1472 offsets.end)
1473 != offsets.old));
1474
1475 /*
1476 * Atomically update last_tsc. This update races against concurrent
1477 * atomic updates, but the race will always cause supplementary full TSC
1478 * records, never the opposite (missing a full TSC record when it would
1479 * be needed).
1480 */
1481 save_last_tsc(config, buf, ctx->tsc);
1482
1483 /*
1484 * Push the reader if necessary
1485 */
1486 lib_ring_buffer_reserve_push_reader(buf, chan, offsets.end - 1);
1487
1488 /*
1489 * Clear noref flag for this subbuffer.
1490 */
1491 lib_ring_buffer_clear_noref(config, &buf->backend,
1d498196
MD
1492 subbuf_index(offsets.end - 1, chan),
1493 handle);
852c2936
MD
1494
1495 /*
1496 * Switch old subbuffer if needed.
1497 */
b5a3dfa5 1498 if (caa_unlikely(offsets.switch_old_end)) {
852c2936 1499 lib_ring_buffer_clear_noref(config, &buf->backend,
1d498196
MD
1500 subbuf_index(offsets.old - 1, chan),
1501 handle);
1502 lib_ring_buffer_switch_old_end(buf, chan, &offsets, ctx->tsc, handle);
852c2936
MD
1503 }
1504
1505 /*
1506 * Populate new subbuffer.
1507 */
b5a3dfa5 1508 if (caa_unlikely(offsets.switch_new_start))
1d498196 1509 lib_ring_buffer_switch_new_start(buf, chan, &offsets, ctx->tsc, handle);
852c2936 1510
b5a3dfa5 1511 if (caa_unlikely(offsets.switch_new_end))
1d498196 1512 lib_ring_buffer_switch_new_end(buf, chan, &offsets, ctx->tsc, handle);
852c2936
MD
1513
1514 ctx->slot_size = offsets.size;
1515 ctx->pre_offset = offsets.begin;
1516 ctx->buf_offset = offsets.begin + offsets.pre_header_padding;
1517 return 0;
1518}
d51652f7
MD
1519
1520/*
1521 * Force a read (imply TLS fixup for dlopen) of TLS variables.
1522 */
1523void lttng_fixup_ringbuffer_tls(void)
1524{
1525 asm volatile ("" : : "m" (lib_ring_buffer_nesting));
1526}
This page took 0.099658 seconds and 4 git commands to generate.