libringbuffer/ring_buffer_frontend.c

   1 /*
   2  * ring_buffer_frontend.c
   3  *
   4  * (C) Copyright 2005-2010 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
   5  *
   6  * Ring buffer wait-free buffer synchronization. Producer-consumer and flight
   7  * recorder (overwrite) modes. See thesis:
   8  *
   9  * Desnoyers, Mathieu (2009), "Low-Impact Operating System Tracing", Ph.D.
  10  * dissertation, Ecole Polytechnique de Montreal.
  11  * http://www.lttng.org/pub/thesis/desnoyers-dissertation-2009-12.pdf
  12  *
  13  * - Algorithm presentation in Chapter 5:
  14  *     "Lockless Multi-Core High-Throughput Buffering".
  15  * - Algorithm formal verification in Section 8.6:
  16  *     "Formal verification of LTTng"
  17  *
  18  * Author:
  19  *      Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
  20  *
  21  * Inspired from LTT and RelayFS:
  22  *  Karim Yaghmour <karim@opersys.com>
  23  *  Tom Zanussi <zanussi@us.ibm.com>
  24  *  Bob Wisniewski <bob@watson.ibm.com>
  25  * And from K42 :
  26  *  Bob Wisniewski <bob@watson.ibm.com>
  27  *
  28  * Buffer reader semantic :
  29  *
  30  * - get_subbuf_size
  31  * while buffer is not finalized and empty
  32  *   - get_subbuf
  33  *     - if return value != 0, continue
  34  *   - splice one subbuffer worth of data to a pipe
  35  *   - splice the data from pipe to disk/network
  36  *   - put_subbuf
  37  *
  38  * Dual LGPL v2.1/GPL v2 license.
  39  */
  40
  41 #include <sys/types.h>
  42 #include <sys/mman.h>
  43 #include <sys/stat.h>
  44 #include <fcntl.h>
  45 #include <urcu/compiler.h>
  46 #include <urcu/ref.h>
  47 #include <helper.h>
  48
  49 #include "smp.h"
  50 #include <lttng/ringbuffer-config.h>
  51 #include "vatomic.h"
  52 #include "backend.h"
  53 #include "frontend.h"
  54 #include "shm.h"
  55
  56 #ifndef max
  57 #define max(a, b)       ((a) > (b) ? (a) : (b))
  58 #endif
  59
  60 /*
  61  * Use POSIX SHM: shm_open(3) and shm_unlink(3).
  62  * close(2) to close the fd returned by shm_open.
  63  * shm_unlink releases the shared memory object name.
  64  * ftruncate(2) sets the size of the memory object.
  65  * mmap/munmap maps the shared memory obj to a virtual address in the
  66  * calling proceess (should be done both in libust and consumer).
  67  * See shm_overview(7) for details.
  68  * Pass file descriptor returned by shm_open(3) to ltt-sessiond through
  69  * a UNIX socket.
  70  *
  71  * Since we don't need to access the object using its name, we can
  72  * immediately shm_unlink(3) it, and only keep the handle with its file
  73  * descriptor.
  74  */
  75
  76 /*
  77  * Internal structure representing offsets to use at a sub-buffer switch.
  78  */
  79 struct switch_offsets {
  80         unsigned long begin, end, old;
  81         size_t pre_header_padding, size;
  82         unsigned int switch_new_start:1, switch_new_end:1, switch_old_start:1,
  83                      switch_old_end:1;
  84 };
  85
  86 __thread unsigned int lib_ring_buffer_nesting;
  87
  88 /*
  89  * TODO: this is unused. Errors are saved within the ring buffer.
  90  * Eventually, allow consumerd to print these errors.
  91  */
  92 static
  93 void lib_ring_buffer_print_errors(struct channel *chan,
  94                                   struct lttng_ust_lib_ring_buffer *buf, int cpu,
  95                                   struct lttng_ust_shm_handle *handle);
  96
  97 /**
  98  * lib_ring_buffer_reset - Reset ring buffer to initial values.
  99  * @buf: Ring buffer.
 100  *
 101  * Effectively empty the ring buffer. Should be called when the buffer is not
 102  * used for writing. The ring buffer can be opened for reading, but the reader
 103  * should not be using the iterator concurrently with reset. The previous
 104  * current iterator record is reset.
 105  */
 106 void lib_ring_buffer_reset(struct lttng_ust_lib_ring_buffer *buf,
 107                            struct lttng_ust_shm_handle *handle)
 108 {
 109         struct channel *chan = shmp(handle, buf->backend.chan);
 110         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 111         unsigned int i;
 112
 113         /*
 114          * Reset iterator first. It will put the subbuffer if it currently holds
 115          * it.
 116          */
 117         v_set(config, &buf->offset, 0);
 118         for (i = 0; i < chan->backend.num_subbuf; i++) {
 119                 v_set(config, &shmp_index(handle, buf->commit_hot, i)->cc, 0);
 120                 v_set(config, &shmp_index(handle, buf->commit_hot, i)->seq, 0);
 121                 v_set(config, &shmp_index(handle, buf->commit_cold, i)->cc_sb, 0);
 122         }
 123         uatomic_set(&buf->consumed, 0);
 124         uatomic_set(&buf->record_disabled, 0);
 125         v_set(config, &buf->last_tsc, 0);
 126         lib_ring_buffer_backend_reset(&buf->backend, handle);
 127         /* Don't reset number of active readers */
 128         v_set(config, &buf->records_lost_full, 0);
 129         v_set(config, &buf->records_lost_wrap, 0);
 130         v_set(config, &buf->records_lost_big, 0);
 131         v_set(config, &buf->records_count, 0);
 132         v_set(config, &buf->records_overrun, 0);
 133         buf->finalized = 0;
 134 }
 135
 136 /**
 137  * channel_reset - Reset channel to initial values.
 138  * @chan: Channel.
 139  *
 140  * Effectively empty the channel. Should be called when the channel is not used
 141  * for writing. The channel can be opened for reading, but the reader should not
 142  * be using the iterator concurrently with reset. The previous current iterator
 143  * record is reset.
 144  */
 145 void channel_reset(struct channel *chan)
 146 {
 147         /*
 148          * Reset iterators first. Will put the subbuffer if held for reading.
 149          */
 150         uatomic_set(&chan->record_disabled, 0);
 151         /* Don't reset commit_count_mask, still valid */
 152         channel_backend_reset(&chan->backend);
 153         /* Don't reset switch/read timer interval */
 154         /* Don't reset notifiers and notifier enable bits */
 155         /* Don't reset reader reference count */
 156 }
 157
 158 /*
 159  * Must be called under cpu hotplug protection.
 160  */
 161 int lib_ring_buffer_create(struct lttng_ust_lib_ring_buffer *buf,
 162                            struct channel_backend *chanb, int cpu,
 163                            struct lttng_ust_shm_handle *handle,
 164                            struct shm_object *shmobj)
 165 {
 166         const struct lttng_ust_lib_ring_buffer_config *config = &chanb->config;
 167         struct channel *chan = caa_container_of(chanb, struct channel, backend);
 168         void *priv = channel_get_private(chan);
 169         size_t subbuf_header_size;
 170         uint64_t tsc;
 171         int ret;
 172
 173         /* Test for cpu hotplug */
 174         if (buf->backend.allocated)
 175                 return 0;
 176
 177         ret = lib_ring_buffer_backend_create(&buf->backend, &chan->backend,
 178                         cpu, handle, shmobj);
 179         if (ret)
 180                 return ret;
 181
 182         align_shm(shmobj, __alignof__(struct commit_counters_hot));
 183         set_shmp(buf->commit_hot,
 184                  zalloc_shm(shmobj,
 185                         sizeof(struct commit_counters_hot) * chan->backend.num_subbuf));
 186         if (!shmp(handle, buf->commit_hot)) {
 187                 ret = -ENOMEM;
 188                 goto free_chanbuf;
 189         }
 190
 191         align_shm(shmobj, __alignof__(struct commit_counters_cold));
 192         set_shmp(buf->commit_cold,
 193                  zalloc_shm(shmobj,
 194                         sizeof(struct commit_counters_cold) * chan->backend.num_subbuf));
 195         if (!shmp(handle, buf->commit_cold)) {
 196                 ret = -ENOMEM;
 197                 goto free_commit;
 198         }
 199
 200         /*
 201          * Write the subbuffer header for first subbuffer so we know the total
 202          * duration of data gathering.
 203          */
 204         subbuf_header_size = config->cb.subbuffer_header_size();
 205         v_set(config, &buf->offset, subbuf_header_size);
 206         subbuffer_id_clear_noref(config, &shmp_index(handle, buf->backend.buf_wsb, 0)->id);
 207         tsc = config->cb.ring_buffer_clock_read(shmp(handle, buf->backend.chan));
 208         config->cb.buffer_begin(buf, tsc, 0, handle);
 209         v_add(config, subbuf_header_size, &shmp_index(handle, buf->commit_hot, 0)->cc);
 210
 211         if (config->cb.buffer_create) {
 212                 ret = config->cb.buffer_create(buf, priv, cpu, chanb->name, handle);
 213                 if (ret)
 214                         goto free_init;
 215         }
 216         buf->backend.allocated = 1;
 217         return 0;
 218
 219         /* Error handling */
 220 free_init:
 221         /* commit_cold will be freed by shm teardown */
 222 free_commit:
 223         /* commit_hot will be freed by shm teardown */
 224 free_chanbuf:
 225         return ret;
 226 }
 227
 228 #if 0
 229 static void switch_buffer_timer(unsigned long data)
 230 {
 231         struct lttng_ust_lib_ring_buffer *buf = (struct lttng_ust_lib_ring_buffer *)data;
 232         struct channel *chan = shmp(handle, buf->backend.chan);
 233         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 234
 235         /*
 236          * Only flush buffers periodically if readers are active.
 237          */
 238         if (uatomic_read(&buf->active_readers) || uatomic_read(&buf->active_shadow_readers))
 239                 lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE, handle);
 240
 241         //TODO timers
 242         //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
 243         //      mod_timer_pinned(&buf->switch_timer,
 244         //                       jiffies + chan->switch_timer_interval);
 245         //else
 246         //      mod_timer(&buf->switch_timer,
 247         //                jiffies + chan->switch_timer_interval);
 248 }
 249 #endif //0
 250
 251 static void lib_ring_buffer_start_switch_timer(struct lttng_ust_lib_ring_buffer *buf,
 252                            struct lttng_ust_shm_handle *handle)
 253 {
 254         struct channel *chan = shmp(handle, buf->backend.chan);
 255         //const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 256
 257         if (!chan->switch_timer_interval || buf->switch_timer_enabled)
 258                 return;
 259         //TODO
 260         //init_timer(&buf->switch_timer);
 261         //buf->switch_timer.function = switch_buffer_timer;
 262         //buf->switch_timer.expires = jiffies + chan->switch_timer_interval;
 263         //buf->switch_timer.data = (unsigned long)buf;
 264         //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
 265         //      add_timer_on(&buf->switch_timer, buf->backend.cpu);
 266         //else
 267         //      add_timer(&buf->switch_timer);
 268         buf->switch_timer_enabled = 1;
 269 }
 270
 271 static void lib_ring_buffer_stop_switch_timer(struct lttng_ust_lib_ring_buffer *buf,
 272                            struct lttng_ust_shm_handle *handle)
 273 {
 274         struct channel *chan = shmp(handle, buf->backend.chan);
 275
 276         if (!chan->switch_timer_interval || !buf->switch_timer_enabled)
 277                 return;
 278
 279         //TODO
 280         //del_timer_sync(&buf->switch_timer);
 281         buf->switch_timer_enabled = 0;
 282 }
 283
 284 #if 0
 285 /*
 286  * Polling timer to check the channels for data.
 287  */
 288 static void read_buffer_timer(unsigned long data)
 289 {
 290         struct lttng_ust_lib_ring_buffer *buf = (struct lttng_ust_lib_ring_buffer *)data;
 291         struct channel *chan = shmp(handle, buf->backend.chan);
 292         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 293
 294         CHAN_WARN_ON(chan, !buf->backend.allocated);
 295
 296         if (uatomic_read(&buf->active_readers) || uatomic_read(&buf->active_shadow_readers))
 297             && lib_ring_buffer_poll_deliver(config, buf, chan)) {
 298                 //TODO
 299                 //wake_up_interruptible(&buf->read_wait);
 300                 //wake_up_interruptible(&chan->read_wait);
 301         }
 302
 303         //TODO
 304         //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
 305         //      mod_timer_pinned(&buf->read_timer,
 306         //                       jiffies + chan->read_timer_interval);
 307         //else
 308         //      mod_timer(&buf->read_timer,
 309         //                jiffies + chan->read_timer_interval);
 310 }
 311 #endif //0
 312
 313 static void lib_ring_buffer_start_read_timer(struct lttng_ust_lib_ring_buffer *buf,
 314                            struct lttng_ust_shm_handle *handle)
 315 {
 316         struct channel *chan = shmp(handle, buf->backend.chan);
 317         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 318
 319         if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER
 320             || !chan->read_timer_interval
 321             || buf->read_timer_enabled)
 322                 return;
 323
 324         //TODO
 325         //init_timer(&buf->read_timer);
 326         //buf->read_timer.function = read_buffer_timer;
 327         //buf->read_timer.expires = jiffies + chan->read_timer_interval;
 328         //buf->read_timer.data = (unsigned long)buf;
 329
 330         //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
 331         //      add_timer_on(&buf->read_timer, buf->backend.cpu);
 332         //else
 333         //      add_timer(&buf->read_timer);
 334         buf->read_timer_enabled = 1;
 335 }
 336
 337 static void lib_ring_buffer_stop_read_timer(struct lttng_ust_lib_ring_buffer *buf,
 338                            struct lttng_ust_shm_handle *handle)
 339 {
 340         struct channel *chan = shmp(handle, buf->backend.chan);
 341         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 342
 343         if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER
 344             || !chan->read_timer_interval
 345             || !buf->read_timer_enabled)
 346                 return;
 347
 348         //TODO
 349         //del_timer_sync(&buf->read_timer);
 350         /*
 351          * do one more check to catch data that has been written in the last
 352          * timer period.
 353          */
 354         if (lib_ring_buffer_poll_deliver(config, buf, chan, handle)) {
 355                 //TODO
 356                 //wake_up_interruptible(&buf->read_wait);
 357                 //wake_up_interruptible(&chan->read_wait);
 358         }
 359         buf->read_timer_enabled = 0;
 360 }
 361
 362 static void channel_unregister_notifiers(struct channel *chan,
 363                            struct lttng_ust_shm_handle *handle)
 364 {
 365         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 366         int cpu;
 367
 368         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
 369                 for_each_possible_cpu(cpu) {
 370                         struct lttng_ust_lib_ring_buffer *buf = shmp(handle, chan->backend.buf[cpu].shmp);
 371
 372                         lib_ring_buffer_stop_switch_timer(buf, handle);
 373                         lib_ring_buffer_stop_read_timer(buf, handle);
 374                 }
 375         } else {
 376                 struct lttng_ust_lib_ring_buffer *buf = shmp(handle, chan->backend.buf[0].shmp);
 377
 378                 lib_ring_buffer_stop_switch_timer(buf, handle);
 379                 lib_ring_buffer_stop_read_timer(buf, handle);
 380         }
 381         //channel_backend_unregister_notifiers(&chan->backend);
 382 }
 383
 384 static void channel_free(struct channel *chan, struct lttng_ust_shm_handle *handle,
 385                 int shadow)
 386 {
 387         if (!shadow)
 388                 channel_backend_free(&chan->backend, handle);
 389         /* chan is freed by shm teardown */
 390         shm_object_table_destroy(handle->table);
 391         free(handle);
 392 }
 393
 394 /**
 395  * channel_create - Create channel.
 396  * @config: ring buffer instance configuration
 397  * @name: name of the channel
 398  * @priv_data: ring buffer client private data area pointer (output)
 399  * @priv_data_size: length, in bytes, of the private data area.
 400  * @priv_data_init: initialization data for private data.
 401  * @buf_addr: pointer the the beginning of the preallocated buffer contiguous
 402  *            address mapping. It is used only by RING_BUFFER_STATIC
 403  *            configuration. It can be set to NULL for other backends.
 404  * @subbuf_size: subbuffer size
 405  * @num_subbuf: number of subbuffers
 406  * @switch_timer_interval: Time interval (in us) to fill sub-buffers with
 407  *                         padding to let readers get those sub-buffers.
 408  *                         Used for live streaming.
 409  * @read_timer_interval: Time interval (in us) to wake up pending readers.
 410  *
 411  * Holds cpu hotplug.
 412  * Returns NULL on failure.
 413  */
 414 struct lttng_ust_shm_handle *channel_create(const struct lttng_ust_lib_ring_buffer_config *config,
 415                    const char *name,
 416                    void **priv_data,
 417                    size_t priv_data_align,
 418                    size_t priv_data_size,
 419                    void *priv_data_init,
 420                    void *buf_addr, size_t subbuf_size,
 421                    size_t num_subbuf, unsigned int switch_timer_interval,
 422                    unsigned int read_timer_interval,
 423                    int **shm_fd, int **wait_fd, uint64_t **memory_map_size)
 424 {
 425         int ret, cpu;
 426         size_t shmsize, chansize;
 427         struct channel *chan;
 428         struct lttng_ust_shm_handle *handle;
 429         struct shm_object *shmobj;
 430         struct shm_ref *ref;
 431
 432         if (lib_ring_buffer_check_config(config, switch_timer_interval,
 433                                          read_timer_interval))
 434                 return NULL;
 435
 436         handle = zmalloc(sizeof(struct lttng_ust_shm_handle));
 437         if (!handle)
 438                 return NULL;
 439
 440         /* Allocate table for channel + per-cpu buffers */
 441         handle->table = shm_object_table_create(1 + num_possible_cpus());
 442         if (!handle->table)
 443                 goto error_table_alloc;
 444
 445         /* Calculate the shm allocation layout */
 446         shmsize = sizeof(struct channel);
 447         shmsize += offset_align(shmsize, __alignof__(struct lttng_ust_lib_ring_buffer_shmp));
 448         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
 449                 shmsize += sizeof(struct lttng_ust_lib_ring_buffer_shmp) * num_possible_cpus();
 450         else
 451                 shmsize += sizeof(struct lttng_ust_lib_ring_buffer_shmp);
 452         chansize = shmsize;
 453         shmsize += offset_align(shmsize, priv_data_align);
 454         shmsize += priv_data_size;
 455
 456         shmobj = shm_object_table_append(handle->table, shmsize);
 457         if (!shmobj)
 458                 goto error_append;
 459         /* struct channel is at object 0, offset 0 (hardcoded) */
 460         set_shmp(handle->chan, zalloc_shm(shmobj, chansize));
 461         assert(handle->chan._ref.index == 0);
 462         assert(handle->chan._ref.offset == 0);
 463         chan = shmp(handle, handle->chan);
 464         if (!chan)
 465                 goto error_append;
 466
 467         /* space for private data */
 468         if (priv_data_size) {
 469                 DECLARE_SHMP(void, priv_data_alloc);
 470
 471                 align_shm(shmobj, priv_data_align);
 472                 chan->priv_data_offset = shmobj->allocated_len;
 473                 set_shmp(priv_data_alloc, zalloc_shm(shmobj, priv_data_size));
 474                 if (!shmp(handle, priv_data_alloc))
 475                         goto error_append;
 476                 *priv_data = channel_get_private(chan);
 477                 memcpy(*priv_data, priv_data_init, priv_data_size);
 478         } else {
 479                 chan->priv_data_offset = -1;
 480                 *priv_data = NULL;
 481         }
 482
 483         ret = channel_backend_init(&chan->backend, name, config,
 484                                    subbuf_size, num_subbuf, handle);
 485         if (ret)
 486                 goto error_backend_init;
 487
 488         chan->commit_count_mask = (~0UL >> chan->backend.num_subbuf_order);
 489         //TODO
 490         //chan->switch_timer_interval = usecs_to_jiffies(switch_timer_interval);
 491         //chan->read_timer_interval = usecs_to_jiffies(read_timer_interval);
 492         //TODO
 493         //init_waitqueue_head(&chan->read_wait);
 494         //init_waitqueue_head(&chan->hp_wait);
 495
 496         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
 497                 /*
 498                  * In case of non-hotplug cpu, if the ring-buffer is allocated
 499                  * in early initcall, it will not be notified of secondary cpus.
 500                  * In that off case, we need to allocate for all possible cpus.
 501                  */
 502                 for_each_possible_cpu(cpu) {
 503                         struct lttng_ust_lib_ring_buffer *buf = shmp(handle, chan->backend.buf[cpu].shmp);
 504                         lib_ring_buffer_start_switch_timer(buf, handle);
 505                         lib_ring_buffer_start_read_timer(buf, handle);
 506                 }
 507         } else {
 508                 struct lttng_ust_lib_ring_buffer *buf = shmp(handle, chan->backend.buf[0].shmp);
 509
 510                 lib_ring_buffer_start_switch_timer(buf, handle);
 511                 lib_ring_buffer_start_read_timer(buf, handle);
 512         }
 513         ref = &handle->chan._ref;
 514         shm_get_object_data(handle, ref, shm_fd, wait_fd, memory_map_size);
 515         return handle;
 516
 517 error_backend_init:
 518 error_append:
 519         shm_object_table_destroy(handle->table);
 520 error_table_alloc:
 521         free(handle);
 522         return NULL;
 523 }
 524
 525 struct lttng_ust_shm_handle *channel_handle_create(int shm_fd, int wait_fd,
 526                                         uint64_t memory_map_size)
 527 {
 528         struct lttng_ust_shm_handle *handle;
 529         struct shm_object *object;
 530
 531         handle = zmalloc(sizeof(struct lttng_ust_shm_handle));
 532         if (!handle)
 533                 return NULL;
 534
 535         /* Allocate table for channel + per-cpu buffers */
 536         handle->table = shm_object_table_create(1 + num_possible_cpus());
 537         if (!handle->table)
 538                 goto error_table_alloc;
 539         /* Add channel object */
 540         object = shm_object_table_append_shadow(handle->table,
 541                         shm_fd, wait_fd, memory_map_size);
 542         if (!object)
 543                 goto error_table_object;
 544         /* struct channel is at object 0, offset 0 (hardcoded) */
 545         handle->chan._ref.index = 0;
 546         handle->chan._ref.offset = 0;
 547         return handle;
 548
 549 error_table_object:
 550         shm_object_table_destroy(handle->table);
 551 error_table_alloc:
 552         free(handle);
 553         return NULL;
 554 }
 555
 556 int channel_handle_add_stream(struct lttng_ust_shm_handle *handle,
 557                 int shm_fd, int wait_fd, uint64_t memory_map_size)
 558 {
 559         struct shm_object *object;
 560
 561         /* Add stream object */
 562         object = shm_object_table_append_shadow(handle->table,
 563                         shm_fd, wait_fd, memory_map_size);
 564         if (!object)
 565                 return -1;
 566         return 0;
 567 }
 568
 569 static
 570 void channel_release(struct channel *chan, struct lttng_ust_shm_handle *handle,
 571                 int shadow)
 572 {
 573         channel_free(chan, handle, shadow);
 574 }
 575
 576 /**
 577  * channel_destroy - Finalize, wait for q.s. and destroy channel.
 578  * @chan: channel to destroy
 579  *
 580  * Holds cpu hotplug.
 581  * Call "destroy" callback, finalize channels, decrement the channel
 582  * reference count. Note that when readers have completed data
 583  * consumption of finalized channels, get_subbuf() will return -ENODATA.
 584  * They should release their handle at that point.
 585  */
 586 void channel_destroy(struct channel *chan, struct lttng_ust_shm_handle *handle,
 587                 int shadow)
 588 {
 589         if (shadow) {
 590                 channel_release(chan, handle, shadow);
 591                 return;
 592         }
 593
 594         channel_unregister_notifiers(chan, handle);
 595
 596         /*
 597          * Note: the consumer takes care of finalizing and switching the
 598          * buffers.
 599          */
 600
 601         /*
 602          * sessiond/consumer are keeping a reference on the shm file
 603          * descriptor directly. No need to refcount.
 604          */
 605         channel_release(chan, handle, shadow);
 606         return;
 607 }
 608
 609 struct lttng_ust_lib_ring_buffer *channel_get_ring_buffer(
 610                                         const struct lttng_ust_lib_ring_buffer_config *config,
 611                                         struct channel *chan, int cpu,
 612                                         struct lttng_ust_shm_handle *handle,
 613                                         int **shm_fd, int **wait_fd,
 614                                         uint64_t **memory_map_size)
 615 {
 616         struct shm_ref *ref;
 617
 618         if (config->alloc == RING_BUFFER_ALLOC_GLOBAL) {
 619                 ref = &chan->backend.buf[0].shmp._ref;
 620                 shm_get_object_data(handle, ref, shm_fd, wait_fd,
 621                         memory_map_size);
 622                 return shmp(handle, chan->backend.buf[0].shmp);
 623         } else {
 624                 if (cpu >= num_possible_cpus())
 625                         return NULL;
 626                 ref = &chan->backend.buf[cpu].shmp._ref;
 627                 shm_get_object_data(handle, ref, shm_fd, wait_fd,
 628                         memory_map_size);
 629                 return shmp(handle, chan->backend.buf[cpu].shmp);
 630         }
 631 }
 632
 633 int lib_ring_buffer_open_read(struct lttng_ust_lib_ring_buffer *buf,
 634                               struct lttng_ust_shm_handle *handle,
 635                               int shadow)
 636 {
 637         if (shadow) {
 638                 if (uatomic_cmpxchg(&buf->active_shadow_readers, 0, 1) != 0)
 639                         return -EBUSY;
 640                 cmm_smp_mb();
 641                 return 0;
 642         }
 643         if (uatomic_cmpxchg(&buf->active_readers, 0, 1) != 0)
 644                 return -EBUSY;
 645         cmm_smp_mb();
 646         return 0;
 647 }
 648
 649 void lib_ring_buffer_release_read(struct lttng_ust_lib_ring_buffer *buf,
 650                                   struct lttng_ust_shm_handle *handle,
 651                                   int shadow)
 652 {
 653         struct channel *chan = shmp(handle, buf->backend.chan);
 654
 655         if (shadow) {
 656                 CHAN_WARN_ON(chan, uatomic_read(&buf->active_shadow_readers) != 1);
 657                 cmm_smp_mb();
 658                 uatomic_dec(&buf->active_shadow_readers);
 659                 return;
 660         }
 661         CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1);
 662         cmm_smp_mb();
 663         uatomic_dec(&buf->active_readers);
 664 }
 665
 666 /**
 667  * lib_ring_buffer_snapshot - save subbuffer position snapshot (for read)
 668  * @buf: ring buffer
 669  * @consumed: consumed count indicating the position where to read
 670  * @produced: produced count, indicates position when to stop reading
 671  *
 672  * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no
 673  * data to read at consumed position, or 0 if the get operation succeeds.
 674  */
 675
 676 int lib_ring_buffer_snapshot(struct lttng_ust_lib_ring_buffer *buf,
 677                              unsigned long *consumed, unsigned long *produced,
 678                              struct lttng_ust_shm_handle *handle)
 679 {
 680         struct channel *chan = shmp(handle, buf->backend.chan);
 681         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 682         unsigned long consumed_cur, write_offset;
 683         int finalized;
 684
 685         finalized = CMM_ACCESS_ONCE(buf->finalized);
 686         /*
 687          * Read finalized before counters.
 688          */
 689         cmm_smp_rmb();
 690         consumed_cur = uatomic_read(&buf->consumed);
 691         /*
 692          * No need to issue a memory barrier between consumed count read and
 693          * write offset read, because consumed count can only change
 694          * concurrently in overwrite mode, and we keep a sequence counter
 695          * identifier derived from the write offset to check we are getting
 696          * the same sub-buffer we are expecting (the sub-buffers are atomically
 697          * "tagged" upon writes, tags are checked upon read).
 698          */
 699         write_offset = v_read(config, &buf->offset);
 700
 701         /*
 702          * Check that we are not about to read the same subbuffer in
 703          * which the writer head is.
 704          */
 705         if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_cur, chan)
 706             == 0)
 707                 goto nodata;
 708
 709         *consumed = consumed_cur;
 710         *produced = subbuf_trunc(write_offset, chan);
 711
 712         return 0;
 713
 714 nodata:
 715         /*
 716          * The memory barriers __wait_event()/wake_up_interruptible() take care
 717          * of "raw_spin_is_locked" memory ordering.
 718          */
 719         if (finalized)
 720                 return -ENODATA;
 721         else
 722                 return -EAGAIN;
 723 }
 724
 725 /**
 726  * lib_ring_buffer_put_snapshot - move consumed counter forward
 727  * @buf: ring buffer
 728  * @consumed_new: new consumed count value
 729  */
 730 void lib_ring_buffer_move_consumer(struct lttng_ust_lib_ring_buffer *buf,
 731                                    unsigned long consumed_new,
 732                                    struct lttng_ust_shm_handle *handle)
 733 {
 734         struct lttng_ust_lib_ring_buffer_backend *bufb = &buf->backend;
 735         struct channel *chan = shmp(handle, bufb->chan);
 736         unsigned long consumed;
 737
 738         CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1
 739                         && uatomic_read(&buf->active_shadow_readers) != 1);
 740
 741         /*
 742          * Only push the consumed value forward.
 743          * If the consumed cmpxchg fails, this is because we have been pushed by
 744          * the writer in flight recorder mode.
 745          */
 746         consumed = uatomic_read(&buf->consumed);
 747         while ((long) consumed - (long) consumed_new < 0)
 748                 consumed = uatomic_cmpxchg(&buf->consumed, consumed,
 749                                            consumed_new);
 750 }
 751
 752 /**
 753  * lib_ring_buffer_get_subbuf - get exclusive access to subbuffer for reading
 754  * @buf: ring buffer
 755  * @consumed: consumed count indicating the position where to read
 756  *
 757  * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no
 758  * data to read at consumed position, or 0 if the get operation succeeds.
 759  */
 760 int lib_ring_buffer_get_subbuf(struct lttng_ust_lib_ring_buffer *buf,
 761                                unsigned long consumed,
 762                                struct lttng_ust_shm_handle *handle)
 763 {
 764         struct channel *chan = shmp(handle, buf->backend.chan);
 765         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 766         unsigned long consumed_cur, consumed_idx, commit_count, write_offset;
 767         int ret;
 768         int finalized;
 769
 770 retry:
 771         finalized = CMM_ACCESS_ONCE(buf->finalized);
 772         /*
 773          * Read finalized before counters.
 774          */
 775         cmm_smp_rmb();
 776         consumed_cur = uatomic_read(&buf->consumed);
 777         consumed_idx = subbuf_index(consumed, chan);
 778         commit_count = v_read(config, &shmp_index(handle, buf->commit_cold, consumed_idx)->cc_sb);
 779         /*
 780          * Make sure we read the commit count before reading the buffer
 781          * data and the write offset. Correct consumed offset ordering
 782          * wrt commit count is insured by the use of cmpxchg to update
 783          * the consumed offset.
 784          */
 785         /*
 786          * Local rmb to match the remote wmb to read the commit count
 787          * before the buffer data and the write offset.
 788          */
 789         cmm_smp_rmb();
 790
 791         write_offset = v_read(config, &buf->offset);
 792
 793         /*
 794          * Check that the buffer we are getting is after or at consumed_cur
 795          * position.
 796          */
 797         if ((long) subbuf_trunc(consumed, chan)
 798             - (long) subbuf_trunc(consumed_cur, chan) < 0)
 799                 goto nodata;
 800
 801         /*
 802          * Check that the subbuffer we are trying to consume has been
 803          * already fully committed.
 804          */
 805         if (((commit_count - chan->backend.subbuf_size)
 806              & chan->commit_count_mask)
 807             - (buf_trunc(consumed_cur, chan)
 808                >> chan->backend.num_subbuf_order)
 809             != 0)
 810                 goto nodata;
 811
 812         /*
 813          * Check that we are not about to read the same subbuffer in
 814          * which the writer head is.
 815          */
 816         if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_cur, chan)
 817             == 0)
 818                 goto nodata;
 819
 820         /*
 821          * Failure to get the subbuffer causes a busy-loop retry without going
 822          * to a wait queue. These are caused by short-lived race windows where
 823          * the writer is getting access to a subbuffer we were trying to get
 824          * access to. Also checks that the "consumed" buffer count we are
 825          * looking for matches the one contained in the subbuffer id.
 826          */
 827         ret = update_read_sb_index(config, &buf->backend, &chan->backend,
 828                                    consumed_idx, buf_trunc_val(consumed, chan),
 829                                    handle);
 830         if (ret)
 831                 goto retry;
 832         subbuffer_id_clear_noref(config, &buf->backend.buf_rsb.id);
 833
 834         buf->get_subbuf_consumed = consumed;
 835         buf->get_subbuf = 1;
 836
 837         return 0;
 838
 839 nodata:
 840         /*
 841          * The memory barriers __wait_event()/wake_up_interruptible() take care
 842          * of "raw_spin_is_locked" memory ordering.
 843          */
 844         if (finalized)
 845                 return -ENODATA;
 846         else
 847                 return -EAGAIN;
 848 }
 849
 850 /**
 851  * lib_ring_buffer_put_subbuf - release exclusive subbuffer access
 852  * @buf: ring buffer
 853  */
 854 void lib_ring_buffer_put_subbuf(struct lttng_ust_lib_ring_buffer *buf,
 855                                 struct lttng_ust_shm_handle *handle)
 856 {
 857         struct lttng_ust_lib_ring_buffer_backend *bufb = &buf->backend;
 858         struct channel *chan = shmp(handle, bufb->chan);
 859         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 860         unsigned long read_sb_bindex, consumed_idx, consumed;
 861
 862         CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1
 863                         && uatomic_read(&buf->active_shadow_readers) != 1);
 864
 865         if (!buf->get_subbuf) {
 866                 /*
 867                  * Reader puts a subbuffer it did not get.
 868                  */
 869                 CHAN_WARN_ON(chan, 1);
 870                 return;
 871         }
 872         consumed = buf->get_subbuf_consumed;
 873         buf->get_subbuf = 0;
 874
 875         /*
 876          * Clear the records_unread counter. (overruns counter)
 877          * Can still be non-zero if a file reader simply grabbed the data
 878          * without using iterators.
 879          * Can be below zero if an iterator is used on a snapshot more than
 880          * once.
 881          */
 882         read_sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id);
 883         v_add(config, v_read(config,
 884                              &shmp(handle, shmp_index(handle, bufb->array, read_sb_bindex)->shmp)->records_unread),
 885               &bufb->records_read);
 886         v_set(config, &shmp(handle, shmp_index(handle, bufb->array, read_sb_bindex)->shmp)->records_unread, 0);
 887         CHAN_WARN_ON(chan, config->mode == RING_BUFFER_OVERWRITE
 888                      && subbuffer_id_is_noref(config, bufb->buf_rsb.id));
 889         subbuffer_id_set_noref(config, &bufb->buf_rsb.id);
 890
 891         /*
 892          * Exchange the reader subbuffer with the one we put in its place in the
 893          * writer subbuffer table. Expect the original consumed count. If
 894          * update_read_sb_index fails, this is because the writer updated the
 895          * subbuffer concurrently. We should therefore keep the subbuffer we
 896          * currently have: it has become invalid to try reading this sub-buffer
 897          * consumed count value anyway.
 898          */
 899         consumed_idx = subbuf_index(consumed, chan);
 900         update_read_sb_index(config, &buf->backend, &chan->backend,
 901                              consumed_idx, buf_trunc_val(consumed, chan),
 902                              handle);
 903         /*
 904          * update_read_sb_index return value ignored. Don't exchange sub-buffer
 905          * if the writer concurrently updated it.
 906          */
 907 }
 908
 909 /*
 910  * cons_offset is an iterator on all subbuffer offsets between the reader
 911  * position and the writer position. (inclusive)
 912  */
 913 static
 914 void lib_ring_buffer_print_subbuffer_errors(struct lttng_ust_lib_ring_buffer *buf,
 915                                             struct channel *chan,
 916                                             unsigned long cons_offset,
 917                                             int cpu,
 918                                             struct lttng_ust_shm_handle *handle)
 919 {
 920         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 921         unsigned long cons_idx, commit_count, commit_count_sb;
 922
 923         cons_idx = subbuf_index(cons_offset, chan);
 924         commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, cons_idx)->cc);
 925         commit_count_sb = v_read(config, &shmp_index(handle, buf->commit_cold, cons_idx)->cc_sb);
 926
 927         if (subbuf_offset(commit_count, chan) != 0)
 928                 DBG("ring buffer %s, cpu %d: "
 929                        "commit count in subbuffer %lu,\n"
 930                        "expecting multiples of %lu bytes\n"
 931                        "  [ %lu bytes committed, %lu bytes reader-visible ]\n",
 932                        chan->backend.name, cpu, cons_idx,
 933                        chan->backend.subbuf_size,
 934                        commit_count, commit_count_sb);
 935
 936         DBG("ring buffer: %s, cpu %d: %lu bytes committed\n",
 937                chan->backend.name, cpu, commit_count);
 938 }
 939
 940 static
 941 void lib_ring_buffer_print_buffer_errors(struct lttng_ust_lib_ring_buffer *buf,
 942                                          struct channel *chan,
 943                                          void *priv, int cpu,
 944                                          struct lttng_ust_shm_handle *handle)
 945 {
 946         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 947         unsigned long write_offset, cons_offset;
 948
 949         /*
 950          * No need to order commit_count, write_offset and cons_offset reads
 951          * because we execute at teardown when no more writer nor reader
 952          * references are left.
 953          */
 954         write_offset = v_read(config, &buf->offset);
 955         cons_offset = uatomic_read(&buf->consumed);
 956         if (write_offset != cons_offset)
 957                 DBG("ring buffer %s, cpu %d: "
 958                        "non-consumed data\n"
 959                        "  [ %lu bytes written, %lu bytes read ]\n",
 960                        chan->backend.name, cpu, write_offset, cons_offset);
 961
 962         for (cons_offset = uatomic_read(&buf->consumed);
 963              (long) (subbuf_trunc((unsigned long) v_read(config, &buf->offset),
 964                                   chan)
 965                      - cons_offset) > 0;
 966              cons_offset = subbuf_align(cons_offset, chan))
 967                 lib_ring_buffer_print_subbuffer_errors(buf, chan, cons_offset,
 968                                                        cpu, handle);
 969 }
 970
 971 static
 972 void lib_ring_buffer_print_errors(struct channel *chan,
 973                                   struct lttng_ust_lib_ring_buffer *buf, int cpu,
 974                                   struct lttng_ust_shm_handle *handle)
 975 {
 976         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 977         void *priv = channel_get_private(chan);
 978
 979         DBG("ring buffer %s, cpu %d: %lu records written, "
 980                           "%lu records overrun\n",
 981                           chan->backend.name, cpu,
 982                           v_read(config, &buf->records_count),
 983                           v_read(config, &buf->records_overrun));
 984
 985         if (v_read(config, &buf->records_lost_full)
 986             || v_read(config, &buf->records_lost_wrap)
 987             || v_read(config, &buf->records_lost_big))
 988                 DBG("ring buffer %s, cpu %d: records were lost. Caused by:\n"
 989                        "  [ %lu buffer full, %lu nest buffer wrap-around, "
 990                        "%lu event too big ]\n",
 991                        chan->backend.name, cpu,
 992                        v_read(config, &buf->records_lost_full),
 993                        v_read(config, &buf->records_lost_wrap),
 994                        v_read(config, &buf->records_lost_big));
 995
 996         lib_ring_buffer_print_buffer_errors(buf, chan, priv, cpu, handle);
 997 }
 998
 999 /*
1000  * lib_ring_buffer_switch_old_start: Populate old subbuffer header.
1001  *
1002  * Only executed when the buffer is finalized, in SWITCH_FLUSH.
1003  */
1004 static
1005 void lib_ring_buffer_switch_old_start(struct lttng_ust_lib_ring_buffer *buf,
1006                                       struct channel *chan,
1007                                       struct switch_offsets *offsets,
1008                                       uint64_t tsc,
1009                                       struct lttng_ust_shm_handle *handle)
1010 {
1011         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1012         unsigned long oldidx = subbuf_index(offsets->old, chan);
1013         unsigned long commit_count;
1014
1015         config->cb.buffer_begin(buf, tsc, oldidx, handle);
1016
1017         /*
1018          * Order all writes to buffer before the commit count update that will
1019          * determine that the subbuffer is full.
1020          */
1021         cmm_smp_wmb();
1022         v_add(config, config->cb.subbuffer_header_size(),
1023               &shmp_index(handle, buf->commit_hot, oldidx)->cc);
1024         commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, oldidx)->cc);
1025         /* Check if the written buffer has to be delivered */
1026         lib_ring_buffer_check_deliver(config, buf, chan, offsets->old,
1027                                       commit_count, oldidx, handle);
1028         lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx,
1029                                              offsets->old, commit_count,
1030                                              config->cb.subbuffer_header_size(),
1031                                              handle);
1032 }
1033
1034 /*
1035  * lib_ring_buffer_switch_old_end: switch old subbuffer
1036  *
1037  * Note : offset_old should never be 0 here. It is ok, because we never perform
1038  * buffer switch on an empty subbuffer in SWITCH_ACTIVE mode. The caller
1039  * increments the offset_old value when doing a SWITCH_FLUSH on an empty
1040  * subbuffer.
1041  */
1042 static
1043 void lib_ring_buffer_switch_old_end(struct lttng_ust_lib_ring_buffer *buf,
1044                                     struct channel *chan,
1045                                     struct switch_offsets *offsets,
1046                                     uint64_t tsc,
1047                                     struct lttng_ust_shm_handle *handle)
1048 {
1049         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1050         unsigned long oldidx = subbuf_index(offsets->old - 1, chan);
1051         unsigned long commit_count, padding_size, data_size;
1052
1053         data_size = subbuf_offset(offsets->old - 1, chan) + 1;
1054         padding_size = chan->backend.subbuf_size - data_size;
1055         subbuffer_set_data_size(config, &buf->backend, oldidx, data_size,
1056                                 handle);
1057
1058         /*
1059          * Order all writes to buffer before the commit count update that will
1060          * determine that the subbuffer is full.
1061          */
1062         cmm_smp_wmb();
1063         v_add(config, padding_size, &shmp_index(handle, buf->commit_hot, oldidx)->cc);
1064         commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, oldidx)->cc);
1065         lib_ring_buffer_check_deliver(config, buf, chan, offsets->old - 1,
1066                                       commit_count, oldidx, handle);
1067         lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx,
1068                                              offsets->old, commit_count,
1069                                              padding_size, handle);
1070 }
1071
1072 /*
1073  * lib_ring_buffer_switch_new_start: Populate new subbuffer.
1074  *
1075  * This code can be executed unordered : writers may already have written to the
1076  * sub-buffer before this code gets executed, caution.  The commit makes sure
1077  * that this code is executed before the deliver of this sub-buffer.
1078  */
1079 static
1080 void lib_ring_buffer_switch_new_start(struct lttng_ust_lib_ring_buffer *buf,
1081                                       struct channel *chan,
1082                                       struct switch_offsets *offsets,
1083                                       uint64_t tsc,
1084                                       struct lttng_ust_shm_handle *handle)
1085 {
1086         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1087         unsigned long beginidx = subbuf_index(offsets->begin, chan);
1088         unsigned long commit_count;
1089
1090         config->cb.buffer_begin(buf, tsc, beginidx, handle);
1091
1092         /*
1093          * Order all writes to buffer before the commit count update that will
1094          * determine that the subbuffer is full.
1095          */
1096         cmm_smp_wmb();
1097         v_add(config, config->cb.subbuffer_header_size(),
1098               &shmp_index(handle, buf->commit_hot, beginidx)->cc);
1099         commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, beginidx)->cc);
1100         /* Check if the written buffer has to be delivered */
1101         lib_ring_buffer_check_deliver(config, buf, chan, offsets->begin,
1102                                       commit_count, beginidx, handle);
1103         lib_ring_buffer_write_commit_counter(config, buf, chan, beginidx,
1104                                              offsets->begin, commit_count,
1105                                              config->cb.subbuffer_header_size(),
1106                                              handle);
1107 }
1108
1109 /*
1110  * lib_ring_buffer_switch_new_end: finish switching current subbuffer
1111  *
1112  * The only remaining threads could be the ones with pending commits. They will
1113  * have to do the deliver themselves.
1114  */
1115 static
1116 void lib_ring_buffer_switch_new_end(struct lttng_ust_lib_ring_buffer *buf,
1117                                     struct channel *chan,
1118                                     struct switch_offsets *offsets,
1119                                     uint64_t tsc,
1120                                     struct lttng_ust_shm_handle *handle)
1121 {
1122         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1123         unsigned long endidx = subbuf_index(offsets->end - 1, chan);
1124         unsigned long commit_count, padding_size, data_size;
1125
1126         data_size = subbuf_offset(offsets->end - 1, chan) + 1;
1127         padding_size = chan->backend.subbuf_size - data_size;
1128         subbuffer_set_data_size(config, &buf->backend, endidx, data_size,
1129                                 handle);
1130
1131         /*
1132          * Order all writes to buffer before the commit count update that will
1133          * determine that the subbuffer is full.
1134          */
1135         cmm_smp_wmb();
1136         v_add(config, padding_size, &shmp_index(handle, buf->commit_hot, endidx)->cc);
1137         commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, endidx)->cc);
1138         lib_ring_buffer_check_deliver(config, buf, chan, offsets->end - 1,
1139                                   commit_count, endidx, handle);
1140         lib_ring_buffer_write_commit_counter(config, buf, chan, endidx,
1141                                              offsets->end, commit_count,
1142                                              padding_size, handle);
1143 }
1144
1145 /*
1146  * Returns :
1147  * 0 if ok
1148  * !0 if execution must be aborted.
1149  */
1150 static
1151 int lib_ring_buffer_try_switch_slow(enum switch_mode mode,
1152                                     struct lttng_ust_lib_ring_buffer *buf,
1153                                     struct channel *chan,
1154                                     struct switch_offsets *offsets,
1155                                     uint64_t *tsc)
1156 {
1157         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1158         unsigned long off;
1159
1160         offsets->begin = v_read(config, &buf->offset);
1161         offsets->old = offsets->begin;
1162         offsets->switch_old_start = 0;
1163         off = subbuf_offset(offsets->begin, chan);
1164
1165         *tsc = config->cb.ring_buffer_clock_read(chan);
1166
1167         /*
1168          * Ensure we flush the header of an empty subbuffer when doing the
1169          * finalize (SWITCH_FLUSH). This ensures that we end up knowing the
1170          * total data gathering duration even if there were no records saved
1171          * after the last buffer switch.
1172          * In SWITCH_ACTIVE mode, switch the buffer when it contains events.
1173          * SWITCH_ACTIVE only flushes the current subbuffer, dealing with end of
1174          * subbuffer header as appropriate.
1175          * The next record that reserves space will be responsible for
1176          * populating the following subbuffer header. We choose not to populate
1177          * the next subbuffer header here because we want to be able to use
1178          * SWITCH_ACTIVE for periodical buffer flush, which must
1179          * guarantee that all the buffer content (records and header
1180          * timestamps) are visible to the reader. This is required for
1181          * quiescence guarantees for the fusion merge.
1182          */
1183         if (mode == SWITCH_FLUSH || off > 0) {
1184                 if (caa_unlikely(off == 0)) {
1185                         /*
1186                          * The client does not save any header information.
1187                          * Don't switch empty subbuffer on finalize, because it
1188                          * is invalid to deliver a completely empty subbuffer.
1189                          */
1190                         if (!config->cb.subbuffer_header_size())
1191                                 return -1;
1192                         /*
1193                          * Need to write the subbuffer start header on finalize.
1194                          */
1195                         offsets->switch_old_start = 1;
1196                 }
1197                 offsets->begin = subbuf_align(offsets->begin, chan);
1198         } else
1199                 return -1;      /* we do not have to switch : buffer is empty */
1200         /* Note: old points to the next subbuf at offset 0 */
1201         offsets->end = offsets->begin;
1202         return 0;
1203 }
1204
1205 /*
1206  * Force a sub-buffer switch. This operation is completely reentrant : can be
1207  * called while tracing is active with absolutely no lock held.
1208  *
1209  * Note, however, that as a v_cmpxchg is used for some atomic
1210  * operations, this function must be called from the CPU which owns the buffer
1211  * for a ACTIVE flush.
1212  */
1213 void lib_ring_buffer_switch_slow(struct lttng_ust_lib_ring_buffer *buf, enum switch_mode mode,
1214                                  struct lttng_ust_shm_handle *handle)
1215 {
1216         struct channel *chan = shmp(handle, buf->backend.chan);
1217         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1218         struct switch_offsets offsets;
1219         unsigned long oldidx;
1220         uint64_t tsc;
1221
1222         offsets.size = 0;
1223
1224         /*
1225          * Perform retryable operations.
1226          */
1227         do {
1228                 if (lib_ring_buffer_try_switch_slow(mode, buf, chan, &offsets,
1229                                                     &tsc))
1230                         return; /* Switch not needed */
1231         } while (v_cmpxchg(config, &buf->offset, offsets.old, offsets.end)
1232                  != offsets.old);
1233
1234         /*
1235          * Atomically update last_tsc. This update races against concurrent
1236          * atomic updates, but the race will always cause supplementary full TSC
1237          * records, never the opposite (missing a full TSC record when it would
1238          * be needed).
1239          */
1240         save_last_tsc(config, buf, tsc);
1241
1242         /*
1243          * Push the reader if necessary
1244          */
1245         lib_ring_buffer_reserve_push_reader(buf, chan, offsets.old);
1246
1247         oldidx = subbuf_index(offsets.old, chan);
1248         lib_ring_buffer_clear_noref(config, &buf->backend, oldidx, handle);
1249
1250         /*
1251          * May need to populate header start on SWITCH_FLUSH.
1252          */
1253         if (offsets.switch_old_start) {
1254                 lib_ring_buffer_switch_old_start(buf, chan, &offsets, tsc, handle);
1255                 offsets.old += config->cb.subbuffer_header_size();
1256         }
1257
1258         /*
1259          * Switch old subbuffer.
1260          */
1261         lib_ring_buffer_switch_old_end(buf, chan, &offsets, tsc, handle);
1262 }
1263
1264 /*
1265  * Returns :
1266  * 0 if ok
1267  * -ENOSPC if event size is too large for packet.
1268  * -ENOBUFS if there is currently not enough space in buffer for the event.
1269  * -EIO if data cannot be written into the buffer for any other reason.
1270  */
1271 static
1272 int lib_ring_buffer_try_reserve_slow(struct lttng_ust_lib_ring_buffer *buf,
1273                                      struct channel *chan,
1274                                      struct switch_offsets *offsets,
1275                                      struct lttng_ust_lib_ring_buffer_ctx *ctx)
1276 {
1277         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1278         struct lttng_ust_shm_handle *handle = ctx->handle;
1279         unsigned long reserve_commit_diff;
1280
1281         offsets->begin = v_read(config, &buf->offset);
1282         offsets->old = offsets->begin;
1283         offsets->switch_new_start = 0;
1284         offsets->switch_new_end = 0;
1285         offsets->switch_old_end = 0;
1286         offsets->pre_header_padding = 0;
1287
1288         ctx->tsc = config->cb.ring_buffer_clock_read(chan);
1289         if ((int64_t) ctx->tsc == -EIO)
1290                 return -EIO;
1291
1292         if (last_tsc_overflow(config, buf, ctx->tsc))
1293                 ctx->rflags |= RING_BUFFER_RFLAG_FULL_TSC;
1294
1295         if (caa_unlikely(subbuf_offset(offsets->begin, ctx->chan) == 0)) {
1296                 offsets->switch_new_start = 1;          /* For offsets->begin */
1297         } else {
1298                 offsets->size = config->cb.record_header_size(config, chan,
1299                                                 offsets->begin,
1300                                                 &offsets->pre_header_padding,
1301                                                 ctx);
1302                 offsets->size +=
1303                         lib_ring_buffer_align(offsets->begin + offsets->size,
1304                                               ctx->largest_align)
1305                         + ctx->data_size;
1306                 if (caa_unlikely(subbuf_offset(offsets->begin, chan) +
1307                              offsets->size > chan->backend.subbuf_size)) {
1308                         offsets->switch_old_end = 1;    /* For offsets->old */
1309                         offsets->switch_new_start = 1;  /* For offsets->begin */
1310                 }
1311         }
1312         if (caa_unlikely(offsets->switch_new_start)) {
1313                 unsigned long sb_index;
1314
1315                 /*
1316                  * We are typically not filling the previous buffer completely.
1317                  */
1318                 if (caa_likely(offsets->switch_old_end))
1319                         offsets->begin = subbuf_align(offsets->begin, chan);
1320                 offsets->begin = offsets->begin
1321                                  + config->cb.subbuffer_header_size();
1322                 /* Test new buffer integrity */
1323                 sb_index = subbuf_index(offsets->begin, chan);
1324                 reserve_commit_diff =
1325                   (buf_trunc(offsets->begin, chan)
1326                    >> chan->backend.num_subbuf_order)
1327                   - ((unsigned long) v_read(config,
1328                                             &shmp_index(handle, buf->commit_cold, sb_index)->cc_sb)
1329                      & chan->commit_count_mask);
1330                 if (caa_likely(reserve_commit_diff == 0)) {
1331                         /* Next subbuffer not being written to. */
1332                         if (caa_unlikely(config->mode != RING_BUFFER_OVERWRITE &&
1333                                 subbuf_trunc(offsets->begin, chan)
1334                                  - subbuf_trunc((unsigned long)
1335                                      uatomic_read(&buf->consumed), chan)
1336                                 >= chan->backend.buf_size)) {
1337                                 /*
1338                                  * We do not overwrite non consumed buffers
1339                                  * and we are full : record is lost.
1340                                  */
1341                                 v_inc(config, &buf->records_lost_full);
1342                                 return -ENOBUFS;
1343                         } else {
1344                                 /*
1345                                  * Next subbuffer not being written to, and we
1346                                  * are either in overwrite mode or the buffer is
1347                                  * not full. It's safe to write in this new
1348                                  * subbuffer.
1349                                  */
1350                         }
1351                 } else {
1352                         /*
1353                          * Next subbuffer reserve offset does not match the
1354                          * commit offset. Drop record in producer-consumer and
1355                          * overwrite mode. Caused by either a writer OOPS or too
1356                          * many nested writes over a reserve/commit pair.
1357                          */
1358                         v_inc(config, &buf->records_lost_wrap);
1359                         return -EIO;
1360                 }
1361                 offsets->size =
1362                         config->cb.record_header_size(config, chan,
1363                                                 offsets->begin,
1364                                                 &offsets->pre_header_padding,
1365                                                 ctx);
1366                 offsets->size +=
1367                         lib_ring_buffer_align(offsets->begin + offsets->size,
1368                                               ctx->largest_align)
1369                         + ctx->data_size;
1370                 if (caa_unlikely(subbuf_offset(offsets->begin, chan)
1371                              + offsets->size > chan->backend.subbuf_size)) {
1372                         /*
1373                          * Record too big for subbuffers, report error, don't
1374                          * complete the sub-buffer switch.
1375                          */
1376                         v_inc(config, &buf->records_lost_big);
1377                         return -ENOSPC;
1378                 } else {
1379                         /*
1380                          * We just made a successful buffer switch and the
1381                          * record fits in the new subbuffer. Let's write.
1382                          */
1383                 }
1384         } else {
1385                 /*
1386                  * Record fits in the current buffer and we are not on a switch
1387                  * boundary. It's safe to write.
1388                  */
1389         }
1390         offsets->end = offsets->begin + offsets->size;
1391
1392         if (caa_unlikely(subbuf_offset(offsets->end, chan) == 0)) {
1393                 /*
1394                  * The offset_end will fall at the very beginning of the next
1395                  * subbuffer.
1396                  */
1397                 offsets->switch_new_end = 1;    /* For offsets->begin */
1398         }
1399         return 0;
1400 }
1401
1402 /**
1403  * lib_ring_buffer_reserve_slow - Atomic slot reservation in a buffer.
1404  * @ctx: ring buffer context.
1405  *
1406  * Return : -NOBUFS if not enough space, -ENOSPC if event size too large,
1407  * -EIO for other errors, else returns 0.
1408  * It will take care of sub-buffer switching.
1409  */
1410 int lib_ring_buffer_reserve_slow(struct lttng_ust_lib_ring_buffer_ctx *ctx)
1411 {
1412         struct channel *chan = ctx->chan;
1413         struct lttng_ust_shm_handle *handle = ctx->handle;
1414         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1415         struct lttng_ust_lib_ring_buffer *buf;
1416         struct switch_offsets offsets;
1417         int ret;
1418
1419         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
1420                 buf = shmp(handle, chan->backend.buf[ctx->cpu].shmp);
1421         else
1422                 buf = shmp(handle, chan->backend.buf[0].shmp);
1423         ctx->buf = buf;
1424
1425         offsets.size = 0;
1426
1427         do {
1428                 ret = lib_ring_buffer_try_reserve_slow(buf, chan, &offsets,
1429                                                        ctx);
1430                 if (caa_unlikely(ret))
1431                         return ret;
1432         } while (caa_unlikely(v_cmpxchg(config, &buf->offset, offsets.old,
1433                                     offsets.end)
1434                           != offsets.old));
1435
1436         /*
1437          * Atomically update last_tsc. This update races against concurrent
1438          * atomic updates, but the race will always cause supplementary full TSC
1439          * records, never the opposite (missing a full TSC record when it would
1440          * be needed).
1441          */
1442         save_last_tsc(config, buf, ctx->tsc);
1443
1444         /*
1445          * Push the reader if necessary
1446          */
1447         lib_ring_buffer_reserve_push_reader(buf, chan, offsets.end - 1);
1448
1449         /*
1450          * Clear noref flag for this subbuffer.
1451          */
1452         lib_ring_buffer_clear_noref(config, &buf->backend,
1453                                     subbuf_index(offsets.end - 1, chan),
1454                                     handle);
1455
1456         /*
1457          * Switch old subbuffer if needed.
1458          */
1459         if (caa_unlikely(offsets.switch_old_end)) {
1460                 lib_ring_buffer_clear_noref(config, &buf->backend,
1461                                             subbuf_index(offsets.old - 1, chan),
1462                                             handle);
1463                 lib_ring_buffer_switch_old_end(buf, chan, &offsets, ctx->tsc, handle);
1464         }
1465
1466         /*
1467          * Populate new subbuffer.
1468          */
1469         if (caa_unlikely(offsets.switch_new_start))
1470                 lib_ring_buffer_switch_new_start(buf, chan, &offsets, ctx->tsc, handle);
1471
1472         if (caa_unlikely(offsets.switch_new_end))
1473                 lib_ring_buffer_switch_new_end(buf, chan, &offsets, ctx->tsc, handle);
1474
1475         ctx->slot_size = offsets.size;
1476         ctx->pre_offset = offsets.begin;
1477         ctx->buf_offset = offsets.begin + offsets.pre_header_padding;
1478         return 0;
1479 }