libringbuffer/ring_buffer_frontend.c

   1 /*
   2  * ring_buffer_frontend.c
   3  *
   4  * (C) Copyright 2005-2010 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
   5  *
   6  * Ring buffer wait-free buffer synchronization. Producer-consumer and flight
   7  * recorder (overwrite) modes. See thesis:
   8  *
   9  * Desnoyers, Mathieu (2009), "Low-Impact Operating System Tracing", Ph.D.
  10  * dissertation, Ecole Polytechnique de Montreal.
  11  * http://www.lttng.org/pub/thesis/desnoyers-dissertation-2009-12.pdf
  12  *
  13  * - Algorithm presentation in Chapter 5:
  14  *     "Lockless Multi-Core High-Throughput Buffering".
  15  * - Algorithm formal verification in Section 8.6:
  16  *     "Formal verification of LTTng"
  17  *
  18  * Author:
  19  *      Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
  20  *
  21  * Inspired from LTT and RelayFS:
  22  *  Karim Yaghmour <karim@opersys.com>
  23  *  Tom Zanussi <zanussi@us.ibm.com>
  24  *  Bob Wisniewski <bob@watson.ibm.com>
  25  * And from K42 :
  26  *  Bob Wisniewski <bob@watson.ibm.com>
  27  *
  28  * Buffer reader semantic :
  29  *
  30  * - get_subbuf_size
  31  * while buffer is not finalized and empty
  32  *   - get_subbuf
  33  *     - if return value != 0, continue
  34  *   - splice one subbuffer worth of data to a pipe
  35  *   - splice the data from pipe to disk/network
  36  *   - put_subbuf
  37  *
  38  * Dual LGPL v2.1/GPL v2 license.
  39  */
  40
  41 #include <sys/types.h>
  42 #include <sys/mman.h>
  43 #include <sys/stat.h>
  44 #include <fcntl.h>
  45 #include <urcu/compiler.h>
  46 #include <urcu/ref.h>
  47
  48 #include "smp.h"
  49 #include <lttng/ringbuffer-config.h>
  50 #include "backend.h"
  51 #include "frontend.h"
  52 #include "shm.h"
  53
  54 #ifndef max
  55 #define max(a, b)       ((a) > (b) ? (a) : (b))
  56 #endif
  57
  58 /*
  59  * Use POSIX SHM: shm_open(3) and shm_unlink(3).
  60  * close(2) to close the fd returned by shm_open.
  61  * shm_unlink releases the shared memory object name.
  62  * ftruncate(2) sets the size of the memory object.
  63  * mmap/munmap maps the shared memory obj to a virtual address in the
  64  * calling proceess (should be done both in libust and consumer).
  65  * See shm_overview(7) for details.
  66  * Pass file descriptor returned by shm_open(3) to ltt-sessiond through
  67  * a UNIX socket.
  68  *
  69  * Since we don't need to access the object using its name, we can
  70  * immediately shm_unlink(3) it, and only keep the handle with its file
  71  * descriptor.
  72  */
  73
  74 /*
  75  * Internal structure representing offsets to use at a sub-buffer switch.
  76  */
  77 struct switch_offsets {
  78         unsigned long begin, end, old;
  79         size_t pre_header_padding, size;
  80         unsigned int switch_new_start:1, switch_new_end:1, switch_old_start:1,
  81                      switch_old_end:1;
  82 };
  83
  84 __thread unsigned int lib_ring_buffer_nesting;
  85
  86 /*
  87  * TODO: this is unused. Errors are saved within the ring buffer.
  88  * Eventually, allow consumerd to print these errors.
  89  */
  90 static
  91 void lib_ring_buffer_print_errors(struct channel *chan,
  92                                   struct lttng_ust_lib_ring_buffer *buf, int cpu,
  93                                   struct lttng_ust_shm_handle *handle);
  94
  95 /**
  96  * lib_ring_buffer_reset - Reset ring buffer to initial values.
  97  * @buf: Ring buffer.
  98  *
  99  * Effectively empty the ring buffer. Should be called when the buffer is not
 100  * used for writing. The ring buffer can be opened for reading, but the reader
 101  * should not be using the iterator concurrently with reset. The previous
 102  * current iterator record is reset.
 103  */
 104 void lib_ring_buffer_reset(struct lttng_ust_lib_ring_buffer *buf,
 105                            struct lttng_ust_shm_handle *handle)
 106 {
 107         struct channel *chan = shmp(handle, buf->backend.chan);
 108         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 109         unsigned int i;
 110
 111         /*
 112          * Reset iterator first. It will put the subbuffer if it currently holds
 113          * it.
 114          */
 115         v_set(config, &buf->offset, 0);
 116         for (i = 0; i < chan->backend.num_subbuf; i++) {
 117                 v_set(config, &shmp_index(handle, buf->commit_hot, i)->cc, 0);
 118                 v_set(config, &shmp_index(handle, buf->commit_hot, i)->seq, 0);
 119                 v_set(config, &shmp_index(handle, buf->commit_cold, i)->cc_sb, 0);
 120         }
 121         uatomic_set(&buf->consumed, 0);
 122         uatomic_set(&buf->record_disabled, 0);
 123         v_set(config, &buf->last_tsc, 0);
 124         lib_ring_buffer_backend_reset(&buf->backend, handle);
 125         /* Don't reset number of active readers */
 126         v_set(config, &buf->records_lost_full, 0);
 127         v_set(config, &buf->records_lost_wrap, 0);
 128         v_set(config, &buf->records_lost_big, 0);
 129         v_set(config, &buf->records_count, 0);
 130         v_set(config, &buf->records_overrun, 0);
 131         buf->finalized = 0;
 132 }
 133
 134 /**
 135  * channel_reset - Reset channel to initial values.
 136  * @chan: Channel.
 137  *
 138  * Effectively empty the channel. Should be called when the channel is not used
 139  * for writing. The channel can be opened for reading, but the reader should not
 140  * be using the iterator concurrently with reset. The previous current iterator
 141  * record is reset.
 142  */
 143 void channel_reset(struct channel *chan)
 144 {
 145         /*
 146          * Reset iterators first. Will put the subbuffer if held for reading.
 147          */
 148         uatomic_set(&chan->record_disabled, 0);
 149         /* Don't reset commit_count_mask, still valid */
 150         channel_backend_reset(&chan->backend);
 151         /* Don't reset switch/read timer interval */
 152         /* Don't reset notifiers and notifier enable bits */
 153         /* Don't reset reader reference count */
 154 }
 155
 156 /*
 157  * Must be called under cpu hotplug protection.
 158  */
 159 int lib_ring_buffer_create(struct lttng_ust_lib_ring_buffer *buf,
 160                            struct channel_backend *chanb, int cpu,
 161                            struct lttng_ust_shm_handle *handle,
 162                            struct shm_object *shmobj)
 163 {
 164         const struct lttng_ust_lib_ring_buffer_config *config = &chanb->config;
 165         struct channel *chan = caa_container_of(chanb, struct channel, backend);
 166         void *priv = channel_get_private(chan);
 167         size_t subbuf_header_size;
 168         u64 tsc;
 169         int ret;
 170
 171         /* Test for cpu hotplug */
 172         if (buf->backend.allocated)
 173                 return 0;
 174
 175         ret = lib_ring_buffer_backend_create(&buf->backend, &chan->backend,
 176                         cpu, handle, shmobj);
 177         if (ret)
 178                 return ret;
 179
 180         align_shm(shmobj, __alignof__(struct commit_counters_hot));
 181         set_shmp(buf->commit_hot,
 182                  zalloc_shm(shmobj,
 183                         sizeof(struct commit_counters_hot) * chan->backend.num_subbuf));
 184         if (!shmp(handle, buf->commit_hot)) {
 185                 ret = -ENOMEM;
 186                 goto free_chanbuf;
 187         }
 188
 189         align_shm(shmobj, __alignof__(struct commit_counters_cold));
 190         set_shmp(buf->commit_cold,
 191                  zalloc_shm(shmobj,
 192                         sizeof(struct commit_counters_cold) * chan->backend.num_subbuf));
 193         if (!shmp(handle, buf->commit_cold)) {
 194                 ret = -ENOMEM;
 195                 goto free_commit;
 196         }
 197
 198         /*
 199          * Write the subbuffer header for first subbuffer so we know the total
 200          * duration of data gathering.
 201          */
 202         subbuf_header_size = config->cb.subbuffer_header_size();
 203         v_set(config, &buf->offset, subbuf_header_size);
 204         subbuffer_id_clear_noref(config, &shmp_index(handle, buf->backend.buf_wsb, 0)->id);
 205         tsc = config->cb.ring_buffer_clock_read(shmp(handle, buf->backend.chan));
 206         config->cb.buffer_begin(buf, tsc, 0, handle);
 207         v_add(config, subbuf_header_size, &shmp_index(handle, buf->commit_hot, 0)->cc);
 208
 209         if (config->cb.buffer_create) {
 210                 ret = config->cb.buffer_create(buf, priv, cpu, chanb->name, handle);
 211                 if (ret)
 212                         goto free_init;
 213         }
 214         buf->backend.allocated = 1;
 215         return 0;
 216
 217         /* Error handling */
 218 free_init:
 219         /* commit_cold will be freed by shm teardown */
 220 free_commit:
 221         /* commit_hot will be freed by shm teardown */
 222 free_chanbuf:
 223         return ret;
 224 }
 225
 226 #if 0
 227 static void switch_buffer_timer(unsigned long data)
 228 {
 229         struct lttng_ust_lib_ring_buffer *buf = (struct lttng_ust_lib_ring_buffer *)data;
 230         struct channel *chan = shmp(handle, buf->backend.chan);
 231         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 232
 233         /*
 234          * Only flush buffers periodically if readers are active.
 235          */
 236         if (uatomic_read(&buf->active_readers) || uatomic_read(&buf->active_shadow_readers))
 237                 lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE, handle);
 238
 239         //TODO timers
 240         //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
 241         //      mod_timer_pinned(&buf->switch_timer,
 242         //                       jiffies + chan->switch_timer_interval);
 243         //else
 244         //      mod_timer(&buf->switch_timer,
 245         //                jiffies + chan->switch_timer_interval);
 246 }
 247 #endif //0
 248
 249 static void lib_ring_buffer_start_switch_timer(struct lttng_ust_lib_ring_buffer *buf,
 250                            struct lttng_ust_shm_handle *handle)
 251 {
 252         struct channel *chan = shmp(handle, buf->backend.chan);
 253         //const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 254
 255         if (!chan->switch_timer_interval || buf->switch_timer_enabled)
 256                 return;
 257         //TODO
 258         //init_timer(&buf->switch_timer);
 259         //buf->switch_timer.function = switch_buffer_timer;
 260         //buf->switch_timer.expires = jiffies + chan->switch_timer_interval;
 261         //buf->switch_timer.data = (unsigned long)buf;
 262         //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
 263         //      add_timer_on(&buf->switch_timer, buf->backend.cpu);
 264         //else
 265         //      add_timer(&buf->switch_timer);
 266         buf->switch_timer_enabled = 1;
 267 }
 268
 269 static void lib_ring_buffer_stop_switch_timer(struct lttng_ust_lib_ring_buffer *buf,
 270                            struct lttng_ust_shm_handle *handle)
 271 {
 272         struct channel *chan = shmp(handle, buf->backend.chan);
 273
 274         if (!chan->switch_timer_interval || !buf->switch_timer_enabled)
 275                 return;
 276
 277         //TODO
 278         //del_timer_sync(&buf->switch_timer);
 279         buf->switch_timer_enabled = 0;
 280 }
 281
 282 #if 0
 283 /*
 284  * Polling timer to check the channels for data.
 285  */
 286 static void read_buffer_timer(unsigned long data)
 287 {
 288         struct lttng_ust_lib_ring_buffer *buf = (struct lttng_ust_lib_ring_buffer *)data;
 289         struct channel *chan = shmp(handle, buf->backend.chan);
 290         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 291
 292         CHAN_WARN_ON(chan, !buf->backend.allocated);
 293
 294         if (uatomic_read(&buf->active_readers) || uatomic_read(&buf->active_shadow_readers))
 295             && lib_ring_buffer_poll_deliver(config, buf, chan)) {
 296                 //TODO
 297                 //wake_up_interruptible(&buf->read_wait);
 298                 //wake_up_interruptible(&chan->read_wait);
 299         }
 300
 301         //TODO
 302         //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
 303         //      mod_timer_pinned(&buf->read_timer,
 304         //                       jiffies + chan->read_timer_interval);
 305         //else
 306         //      mod_timer(&buf->read_timer,
 307         //                jiffies + chan->read_timer_interval);
 308 }
 309 #endif //0
 310
 311 static void lib_ring_buffer_start_read_timer(struct lttng_ust_lib_ring_buffer *buf,
 312                            struct lttng_ust_shm_handle *handle)
 313 {
 314         struct channel *chan = shmp(handle, buf->backend.chan);
 315         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 316
 317         if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER
 318             || !chan->read_timer_interval
 319             || buf->read_timer_enabled)
 320                 return;
 321
 322         //TODO
 323         //init_timer(&buf->read_timer);
 324         //buf->read_timer.function = read_buffer_timer;
 325         //buf->read_timer.expires = jiffies + chan->read_timer_interval;
 326         //buf->read_timer.data = (unsigned long)buf;
 327
 328         //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
 329         //      add_timer_on(&buf->read_timer, buf->backend.cpu);
 330         //else
 331         //      add_timer(&buf->read_timer);
 332         buf->read_timer_enabled = 1;
 333 }
 334
 335 static void lib_ring_buffer_stop_read_timer(struct lttng_ust_lib_ring_buffer *buf,
 336                            struct lttng_ust_shm_handle *handle)
 337 {
 338         struct channel *chan = shmp(handle, buf->backend.chan);
 339         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 340
 341         if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER
 342             || !chan->read_timer_interval
 343             || !buf->read_timer_enabled)
 344                 return;
 345
 346         //TODO
 347         //del_timer_sync(&buf->read_timer);
 348         /*
 349          * do one more check to catch data that has been written in the last
 350          * timer period.
 351          */
 352         if (lib_ring_buffer_poll_deliver(config, buf, chan, handle)) {
 353                 //TODO
 354                 //wake_up_interruptible(&buf->read_wait);
 355                 //wake_up_interruptible(&chan->read_wait);
 356         }
 357         buf->read_timer_enabled = 0;
 358 }
 359
 360 static void channel_unregister_notifiers(struct channel *chan,
 361                            struct lttng_ust_shm_handle *handle)
 362 {
 363         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 364         int cpu;
 365
 366         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
 367                 for_each_possible_cpu(cpu) {
 368                         struct lttng_ust_lib_ring_buffer *buf = shmp(handle, chan->backend.buf[cpu].shmp);
 369
 370                         lib_ring_buffer_stop_switch_timer(buf, handle);
 371                         lib_ring_buffer_stop_read_timer(buf, handle);
 372                 }
 373         } else {
 374                 struct lttng_ust_lib_ring_buffer *buf = shmp(handle, chan->backend.buf[0].shmp);
 375
 376                 lib_ring_buffer_stop_switch_timer(buf, handle);
 377                 lib_ring_buffer_stop_read_timer(buf, handle);
 378         }
 379         //channel_backend_unregister_notifiers(&chan->backend);
 380 }
 381
 382 static void channel_free(struct channel *chan, struct lttng_ust_shm_handle *handle,
 383                 int shadow)
 384 {
 385         if (!shadow)
 386                 channel_backend_free(&chan->backend, handle);
 387         /* chan is freed by shm teardown */
 388         shm_object_table_destroy(handle->table);
 389         free(handle);
 390 }
 391
 392 /**
 393  * channel_create - Create channel.
 394  * @config: ring buffer instance configuration
 395  * @name: name of the channel
 396  * @priv_data: ring buffer client private data area pointer (output)
 397  * @priv_data_size: length, in bytes, of the private data area.
 398  * @priv_data_init: initialization data for private data.
 399  * @buf_addr: pointer the the beginning of the preallocated buffer contiguous
 400  *            address mapping. It is used only by RING_BUFFER_STATIC
 401  *            configuration. It can be set to NULL for other backends.
 402  * @subbuf_size: subbuffer size
 403  * @num_subbuf: number of subbuffers
 404  * @switch_timer_interval: Time interval (in us) to fill sub-buffers with
 405  *                         padding to let readers get those sub-buffers.
 406  *                         Used for live streaming.
 407  * @read_timer_interval: Time interval (in us) to wake up pending readers.
 408  *
 409  * Holds cpu hotplug.
 410  * Returns NULL on failure.
 411  */
 412 struct lttng_ust_shm_handle *channel_create(const struct lttng_ust_lib_ring_buffer_config *config,
 413                    const char *name,
 414                    void **priv_data,
 415                    size_t priv_data_align,
 416                    size_t priv_data_size,
 417                    void *priv_data_init,
 418                    void *buf_addr, size_t subbuf_size,
 419                    size_t num_subbuf, unsigned int switch_timer_interval,
 420                    unsigned int read_timer_interval,
 421                    int *shm_fd, int *wait_fd, uint64_t *memory_map_size)
 422 {
 423         int ret, cpu;
 424         size_t shmsize, chansize;
 425         struct channel *chan;
 426         struct lttng_ust_shm_handle *handle;
 427         struct shm_object *shmobj;
 428         struct shm_ref *ref;
 429
 430         if (lib_ring_buffer_check_config(config, switch_timer_interval,
 431                                          read_timer_interval))
 432                 return NULL;
 433
 434         handle = zmalloc(sizeof(struct lttng_ust_shm_handle));
 435         if (!handle)
 436                 return NULL;
 437
 438         /* Allocate table for channel + per-cpu buffers */
 439         handle->table = shm_object_table_create(1 + num_possible_cpus());
 440         if (!handle->table)
 441                 goto error_table_alloc;
 442
 443         /* Calculate the shm allocation layout */
 444         shmsize = sizeof(struct channel);
 445         shmsize += offset_align(shmsize, __alignof__(struct lttng_ust_lib_ring_buffer_shmp));
 446         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
 447                 shmsize += sizeof(struct lttng_ust_lib_ring_buffer_shmp) * num_possible_cpus();
 448         else
 449                 shmsize += sizeof(struct lttng_ust_lib_ring_buffer_shmp);
 450         chansize = shmsize;
 451         shmsize += offset_align(shmsize, priv_data_align);
 452         shmsize += priv_data_size;
 453
 454         shmobj = shm_object_table_append(handle->table, shmsize);
 455         if (!shmobj)
 456                 goto error_append;
 457         /* struct channel is at object 0, offset 0 (hardcoded) */
 458         set_shmp(handle->chan, zalloc_shm(shmobj, chansize));
 459         assert(handle->chan._ref.index == 0);
 460         assert(handle->chan._ref.offset == 0);
 461         chan = shmp(handle, handle->chan);
 462         if (!chan)
 463                 goto error_append;
 464
 465         /* space for private data */
 466         if (priv_data_size) {
 467                 DECLARE_SHMP(void, priv_data_alloc);
 468
 469                 align_shm(shmobj, priv_data_align);
 470                 chan->priv_data_offset = shmobj->allocated_len;
 471                 set_shmp(priv_data_alloc, zalloc_shm(shmobj, priv_data_size));
 472                 if (!shmp(handle, priv_data_alloc))
 473                         goto error_append;
 474                 *priv_data = channel_get_private(chan);
 475                 memcpy(*priv_data, priv_data_init, priv_data_size);
 476         } else {
 477                 chan->priv_data_offset = -1;
 478                 *priv_data = NULL;
 479         }
 480
 481         ret = channel_backend_init(&chan->backend, name, config,
 482                                    subbuf_size, num_subbuf, handle);
 483         if (ret)
 484                 goto error_backend_init;
 485
 486         chan->commit_count_mask = (~0UL >> chan->backend.num_subbuf_order);
 487         //TODO
 488         //chan->switch_timer_interval = usecs_to_jiffies(switch_timer_interval);
 489         //chan->read_timer_interval = usecs_to_jiffies(read_timer_interval);
 490         //TODO
 491         //init_waitqueue_head(&chan->read_wait);
 492         //init_waitqueue_head(&chan->hp_wait);
 493
 494         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
 495                 /*
 496                  * In case of non-hotplug cpu, if the ring-buffer is allocated
 497                  * in early initcall, it will not be notified of secondary cpus.
 498                  * In that off case, we need to allocate for all possible cpus.
 499                  */
 500                 for_each_possible_cpu(cpu) {
 501                         struct lttng_ust_lib_ring_buffer *buf = shmp(handle, chan->backend.buf[cpu].shmp);
 502                         lib_ring_buffer_start_switch_timer(buf, handle);
 503                         lib_ring_buffer_start_read_timer(buf, handle);
 504                 }
 505         } else {
 506                 struct lttng_ust_lib_ring_buffer *buf = shmp(handle, chan->backend.buf[0].shmp);
 507
 508                 lib_ring_buffer_start_switch_timer(buf, handle);
 509                 lib_ring_buffer_start_read_timer(buf, handle);
 510         }
 511         ref = &handle->chan._ref;
 512         shm_get_object_data(handle, ref, shm_fd, wait_fd, memory_map_size);
 513         return handle;
 514
 515 error_backend_init:
 516 error_append:
 517         shm_object_table_destroy(handle->table);
 518 error_table_alloc:
 519         free(handle);
 520         return NULL;
 521 }
 522
 523 struct lttng_ust_shm_handle *channel_handle_create(int shm_fd, int wait_fd,
 524                                         uint64_t memory_map_size)
 525 {
 526         struct lttng_ust_shm_handle *handle;
 527         struct shm_object *object;
 528
 529         handle = zmalloc(sizeof(struct lttng_ust_shm_handle));
 530         if (!handle)
 531                 return NULL;
 532
 533         /* Allocate table for channel + per-cpu buffers */
 534         handle->table = shm_object_table_create(1 + num_possible_cpus());
 535         if (!handle->table)
 536                 goto error_table_alloc;
 537         /* Add channel object */
 538         object = shm_object_table_append_shadow(handle->table,
 539                         shm_fd, wait_fd, memory_map_size);
 540         if (!object)
 541                 goto error_table_object;
 542         /* struct channel is at object 0, offset 0 (hardcoded) */
 543         handle->chan._ref.index = 0;
 544         handle->chan._ref.offset = 0;
 545         return handle;
 546
 547 error_table_object:
 548         shm_object_table_destroy(handle->table);
 549 error_table_alloc:
 550         free(handle);
 551         return NULL;
 552 }
 553
 554 int channel_handle_add_stream(struct lttng_ust_shm_handle *handle,
 555                 int shm_fd, int wait_fd, uint64_t memory_map_size)
 556 {
 557         struct shm_object *object;
 558
 559         /* Add stream object */
 560         object = shm_object_table_append_shadow(handle->table,
 561                         shm_fd, wait_fd, memory_map_size);
 562         if (!object)
 563                 return -1;
 564         return 0;
 565 }
 566
 567 static
 568 void channel_release(struct channel *chan, struct lttng_ust_shm_handle *handle,
 569                 int shadow)
 570 {
 571         channel_free(chan, handle, shadow);
 572 }
 573
 574 /**
 575  * channel_destroy - Finalize, wait for q.s. and destroy channel.
 576  * @chan: channel to destroy
 577  *
 578  * Holds cpu hotplug.
 579  * Call "destroy" callback, finalize channels, decrement the channel
 580  * reference count. Note that when readers have completed data
 581  * consumption of finalized channels, get_subbuf() will return -ENODATA.
 582  * They should release their handle at that point.
 583  */
 584 void channel_destroy(struct channel *chan, struct lttng_ust_shm_handle *handle,
 585                 int shadow)
 586 {
 587         if (shadow) {
 588                 channel_release(chan, handle, shadow);
 589                 return;
 590         }
 591
 592         channel_unregister_notifiers(chan, handle);
 593
 594         /*
 595          * Note: the consumer takes care of finalizing and switching the
 596          * buffers.
 597          */
 598
 599         /*
 600          * sessiond/consumer are keeping a reference on the shm file
 601          * descriptor directly. No need to refcount.
 602          */
 603         channel_release(chan, handle, shadow);
 604         return;
 605 }
 606
 607 struct lttng_ust_lib_ring_buffer *channel_get_ring_buffer(
 608                                         const struct lttng_ust_lib_ring_buffer_config *config,
 609                                         struct channel *chan, int cpu,
 610                                         struct lttng_ust_shm_handle *handle,
 611                                         int *shm_fd, int *wait_fd,
 612                                         uint64_t *memory_map_size)
 613 {
 614         struct shm_ref *ref;
 615
 616         if (config->alloc == RING_BUFFER_ALLOC_GLOBAL) {
 617                 ref = &chan->backend.buf[0].shmp._ref;
 618                 shm_get_object_data(handle, ref, shm_fd, wait_fd,
 619                         memory_map_size);
 620                 return shmp(handle, chan->backend.buf[0].shmp);
 621         } else {
 622                 if (cpu >= num_possible_cpus())
 623                         return NULL;
 624                 ref = &chan->backend.buf[cpu].shmp._ref;
 625                 shm_get_object_data(handle, ref, shm_fd, wait_fd,
 626                         memory_map_size);
 627                 return shmp(handle, chan->backend.buf[cpu].shmp);
 628         }
 629 }
 630
 631 int lib_ring_buffer_open_read(struct lttng_ust_lib_ring_buffer *buf,
 632                               struct lttng_ust_shm_handle *handle,
 633                               int shadow)
 634 {
 635         if (shadow) {
 636                 if (uatomic_cmpxchg(&buf->active_shadow_readers, 0, 1) != 0)
 637                         return -EBUSY;
 638                 cmm_smp_mb();
 639                 return 0;
 640         }
 641         if (uatomic_cmpxchg(&buf->active_readers, 0, 1) != 0)
 642                 return -EBUSY;
 643         cmm_smp_mb();
 644         return 0;
 645 }
 646
 647 void lib_ring_buffer_release_read(struct lttng_ust_lib_ring_buffer *buf,
 648                                   struct lttng_ust_shm_handle *handle,
 649                                   int shadow)
 650 {
 651         struct channel *chan = shmp(handle, buf->backend.chan);
 652
 653         if (shadow) {
 654                 CHAN_WARN_ON(chan, uatomic_read(&buf->active_shadow_readers) != 1);
 655                 cmm_smp_mb();
 656                 uatomic_dec(&buf->active_shadow_readers);
 657                 return;
 658         }
 659         CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1);
 660         cmm_smp_mb();
 661         uatomic_dec(&buf->active_readers);
 662 }
 663
 664 /**
 665  * lib_ring_buffer_snapshot - save subbuffer position snapshot (for read)
 666  * @buf: ring buffer
 667  * @consumed: consumed count indicating the position where to read
 668  * @produced: produced count, indicates position when to stop reading
 669  *
 670  * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no
 671  * data to read at consumed position, or 0 if the get operation succeeds.
 672  */
 673
 674 int lib_ring_buffer_snapshot(struct lttng_ust_lib_ring_buffer *buf,
 675                              unsigned long *consumed, unsigned long *produced,
 676                              struct lttng_ust_shm_handle *handle)
 677 {
 678         struct channel *chan = shmp(handle, buf->backend.chan);
 679         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 680         unsigned long consumed_cur, write_offset;
 681         int finalized;
 682
 683         finalized = CMM_ACCESS_ONCE(buf->finalized);
 684         /*
 685          * Read finalized before counters.
 686          */
 687         cmm_smp_rmb();
 688         consumed_cur = uatomic_read(&buf->consumed);
 689         /*
 690          * No need to issue a memory barrier between consumed count read and
 691          * write offset read, because consumed count can only change
 692          * concurrently in overwrite mode, and we keep a sequence counter
 693          * identifier derived from the write offset to check we are getting
 694          * the same sub-buffer we are expecting (the sub-buffers are atomically
 695          * "tagged" upon writes, tags are checked upon read).
 696          */
 697         write_offset = v_read(config, &buf->offset);
 698
 699         /*
 700          * Check that we are not about to read the same subbuffer in
 701          * which the writer head is.
 702          */
 703         if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_cur, chan)
 704             == 0)
 705                 goto nodata;
 706
 707         *consumed = consumed_cur;
 708         *produced = subbuf_trunc(write_offset, chan);
 709
 710         return 0;
 711
 712 nodata:
 713         /*
 714          * The memory barriers __wait_event()/wake_up_interruptible() take care
 715          * of "raw_spin_is_locked" memory ordering.
 716          */
 717         if (finalized)
 718                 return -ENODATA;
 719         else
 720                 return -EAGAIN;
 721 }
 722
 723 /**
 724  * lib_ring_buffer_put_snapshot - move consumed counter forward
 725  * @buf: ring buffer
 726  * @consumed_new: new consumed count value
 727  */
 728 void lib_ring_buffer_move_consumer(struct lttng_ust_lib_ring_buffer *buf,
 729                                    unsigned long consumed_new,
 730                                    struct lttng_ust_shm_handle *handle)
 731 {
 732         struct lttng_ust_lib_ring_buffer_backend *bufb = &buf->backend;
 733         struct channel *chan = shmp(handle, bufb->chan);
 734         unsigned long consumed;
 735
 736         CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1
 737                         && uatomic_read(&buf->active_shadow_readers) != 1);
 738
 739         /*
 740          * Only push the consumed value forward.
 741          * If the consumed cmpxchg fails, this is because we have been pushed by
 742          * the writer in flight recorder mode.
 743          */
 744         consumed = uatomic_read(&buf->consumed);
 745         while ((long) consumed - (long) consumed_new < 0)
 746                 consumed = uatomic_cmpxchg(&buf->consumed, consumed,
 747                                            consumed_new);
 748 }
 749
 750 /**
 751  * lib_ring_buffer_get_subbuf - get exclusive access to subbuffer for reading
 752  * @buf: ring buffer
 753  * @consumed: consumed count indicating the position where to read
 754  *
 755  * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no
 756  * data to read at consumed position, or 0 if the get operation succeeds.
 757  */
 758 int lib_ring_buffer_get_subbuf(struct lttng_ust_lib_ring_buffer *buf,
 759                                unsigned long consumed,
 760                                struct lttng_ust_shm_handle *handle)
 761 {
 762         struct channel *chan = shmp(handle, buf->backend.chan);
 763         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 764         unsigned long consumed_cur, consumed_idx, commit_count, write_offset;
 765         int ret;
 766         int finalized;
 767
 768 retry:
 769         finalized = CMM_ACCESS_ONCE(buf->finalized);
 770         /*
 771          * Read finalized before counters.
 772          */
 773         cmm_smp_rmb();
 774         consumed_cur = uatomic_read(&buf->consumed);
 775         consumed_idx = subbuf_index(consumed, chan);
 776         commit_count = v_read(config, &shmp_index(handle, buf->commit_cold, consumed_idx)->cc_sb);
 777         /*
 778          * Make sure we read the commit count before reading the buffer
 779          * data and the write offset. Correct consumed offset ordering
 780          * wrt commit count is insured by the use of cmpxchg to update
 781          * the consumed offset.
 782          */
 783         /*
 784          * Local rmb to match the remote wmb to read the commit count
 785          * before the buffer data and the write offset.
 786          */
 787         cmm_smp_rmb();
 788
 789         write_offset = v_read(config, &buf->offset);
 790
 791         /*
 792          * Check that the buffer we are getting is after or at consumed_cur
 793          * position.
 794          */
 795         if ((long) subbuf_trunc(consumed, chan)
 796             - (long) subbuf_trunc(consumed_cur, chan) < 0)
 797                 goto nodata;
 798
 799         /*
 800          * Check that the subbuffer we are trying to consume has been
 801          * already fully committed.
 802          */
 803         if (((commit_count - chan->backend.subbuf_size)
 804              & chan->commit_count_mask)
 805             - (buf_trunc(consumed_cur, chan)
 806                >> chan->backend.num_subbuf_order)
 807             != 0)
 808                 goto nodata;
 809
 810         /*
 811          * Check that we are not about to read the same subbuffer in
 812          * which the writer head is.
 813          */
 814         if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_cur, chan)
 815             == 0)
 816                 goto nodata;
 817
 818         /*
 819          * Failure to get the subbuffer causes a busy-loop retry without going
 820          * to a wait queue. These are caused by short-lived race windows where
 821          * the writer is getting access to a subbuffer we were trying to get
 822          * access to. Also checks that the "consumed" buffer count we are
 823          * looking for matches the one contained in the subbuffer id.
 824          */
 825         ret = update_read_sb_index(config, &buf->backend, &chan->backend,
 826                                    consumed_idx, buf_trunc_val(consumed, chan),
 827                                    handle);
 828         if (ret)
 829                 goto retry;
 830         subbuffer_id_clear_noref(config, &buf->backend.buf_rsb.id);
 831
 832         buf->get_subbuf_consumed = consumed;
 833         buf->get_subbuf = 1;
 834
 835         return 0;
 836
 837 nodata:
 838         /*
 839          * The memory barriers __wait_event()/wake_up_interruptible() take care
 840          * of "raw_spin_is_locked" memory ordering.
 841          */
 842         if (finalized)
 843                 return -ENODATA;
 844         else
 845                 return -EAGAIN;
 846 }
 847
 848 /**
 849  * lib_ring_buffer_put_subbuf - release exclusive subbuffer access
 850  * @buf: ring buffer
 851  */
 852 void lib_ring_buffer_put_subbuf(struct lttng_ust_lib_ring_buffer *buf,
 853                                 struct lttng_ust_shm_handle *handle)
 854 {
 855         struct lttng_ust_lib_ring_buffer_backend *bufb = &buf->backend;
 856         struct channel *chan = shmp(handle, bufb->chan);
 857         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 858         unsigned long read_sb_bindex, consumed_idx, consumed;
 859
 860         CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1
 861                         && uatomic_read(&buf->active_shadow_readers) != 1);
 862
 863         if (!buf->get_subbuf) {
 864                 /*
 865                  * Reader puts a subbuffer it did not get.
 866                  */
 867                 CHAN_WARN_ON(chan, 1);
 868                 return;
 869         }
 870         consumed = buf->get_subbuf_consumed;
 871         buf->get_subbuf = 0;
 872
 873         /*
 874          * Clear the records_unread counter. (overruns counter)
 875          * Can still be non-zero if a file reader simply grabbed the data
 876          * without using iterators.
 877          * Can be below zero if an iterator is used on a snapshot more than
 878          * once.
 879          */
 880         read_sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id);
 881         v_add(config, v_read(config,
 882                              &shmp(handle, shmp_index(handle, bufb->array, read_sb_bindex)->shmp)->records_unread),
 883               &bufb->records_read);
 884         v_set(config, &shmp(handle, shmp_index(handle, bufb->array, read_sb_bindex)->shmp)->records_unread, 0);
 885         CHAN_WARN_ON(chan, config->mode == RING_BUFFER_OVERWRITE
 886                      && subbuffer_id_is_noref(config, bufb->buf_rsb.id));
 887         subbuffer_id_set_noref(config, &bufb->buf_rsb.id);
 888
 889         /*
 890          * Exchange the reader subbuffer with the one we put in its place in the
 891          * writer subbuffer table. Expect the original consumed count. If
 892          * update_read_sb_index fails, this is because the writer updated the
 893          * subbuffer concurrently. We should therefore keep the subbuffer we
 894          * currently have: it has become invalid to try reading this sub-buffer
 895          * consumed count value anyway.
 896          */
 897         consumed_idx = subbuf_index(consumed, chan);
 898         update_read_sb_index(config, &buf->backend, &chan->backend,
 899                              consumed_idx, buf_trunc_val(consumed, chan),
 900                              handle);
 901         /*
 902          * update_read_sb_index return value ignored. Don't exchange sub-buffer
 903          * if the writer concurrently updated it.
 904          */
 905 }
 906
 907 /*
 908  * cons_offset is an iterator on all subbuffer offsets between the reader
 909  * position and the writer position. (inclusive)
 910  */
 911 static
 912 void lib_ring_buffer_print_subbuffer_errors(struct lttng_ust_lib_ring_buffer *buf,
 913                                             struct channel *chan,
 914                                             unsigned long cons_offset,
 915                                             int cpu,
 916                                             struct lttng_ust_shm_handle *handle)
 917 {
 918         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 919         unsigned long cons_idx, commit_count, commit_count_sb;
 920
 921         cons_idx = subbuf_index(cons_offset, chan);
 922         commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, cons_idx)->cc);
 923         commit_count_sb = v_read(config, &shmp_index(handle, buf->commit_cold, cons_idx)->cc_sb);
 924
 925         if (subbuf_offset(commit_count, chan) != 0)
 926                 DBG("ring buffer %s, cpu %d: "
 927                        "commit count in subbuffer %lu,\n"
 928                        "expecting multiples of %lu bytes\n"
 929                        "  [ %lu bytes committed, %lu bytes reader-visible ]\n",
 930                        chan->backend.name, cpu, cons_idx,
 931                        chan->backend.subbuf_size,
 932                        commit_count, commit_count_sb);
 933
 934         DBG("ring buffer: %s, cpu %d: %lu bytes committed\n",
 935                chan->backend.name, cpu, commit_count);
 936 }
 937
 938 static
 939 void lib_ring_buffer_print_buffer_errors(struct lttng_ust_lib_ring_buffer *buf,
 940                                          struct channel *chan,
 941                                          void *priv, int cpu,
 942                                          struct lttng_ust_shm_handle *handle)
 943 {
 944         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 945         unsigned long write_offset, cons_offset;
 946
 947         /*
 948          * Can be called in the error path of allocation when
 949          * trans_channel_data is not yet set.
 950          */
 951         if (!chan)
 952                 return;
 953         /*
 954          * No need to order commit_count, write_offset and cons_offset reads
 955          * because we execute at teardown when no more writer nor reader
 956          * references are left.
 957          */
 958         write_offset = v_read(config, &buf->offset);
 959         cons_offset = uatomic_read(&buf->consumed);
 960         if (write_offset != cons_offset)
 961                 DBG("ring buffer %s, cpu %d: "
 962                        "non-consumed data\n"
 963                        "  [ %lu bytes written, %lu bytes read ]\n",
 964                        chan->backend.name, cpu, write_offset, cons_offset);
 965
 966         for (cons_offset = uatomic_read(&buf->consumed);
 967              (long) (subbuf_trunc((unsigned long) v_read(config, &buf->offset),
 968                                   chan)
 969                      - cons_offset) > 0;
 970              cons_offset = subbuf_align(cons_offset, chan))
 971                 lib_ring_buffer_print_subbuffer_errors(buf, chan, cons_offset,
 972                                                        cpu, handle);
 973 }
 974
 975 static
 976 void lib_ring_buffer_print_errors(struct channel *chan,
 977                                   struct lttng_ust_lib_ring_buffer *buf, int cpu,
 978                                   struct lttng_ust_shm_handle *handle)
 979 {
 980         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
 981         void *priv = channel_get_private(chan);
 982
 983         DBG("ring buffer %s, cpu %d: %lu records written, "
 984                           "%lu records overrun\n",
 985                           chan->backend.name, cpu,
 986                           v_read(config, &buf->records_count),
 987                           v_read(config, &buf->records_overrun));
 988
 989         if (v_read(config, &buf->records_lost_full)
 990             || v_read(config, &buf->records_lost_wrap)
 991             || v_read(config, &buf->records_lost_big))
 992                 DBG("ring buffer %s, cpu %d: records were lost. Caused by:\n"
 993                        "  [ %lu buffer full, %lu nest buffer wrap-around, "
 994                        "%lu event too big ]\n",
 995                        chan->backend.name, cpu,
 996                        v_read(config, &buf->records_lost_full),
 997                        v_read(config, &buf->records_lost_wrap),
 998                        v_read(config, &buf->records_lost_big));
 999
1000         lib_ring_buffer_print_buffer_errors(buf, chan, priv, cpu, handle);
1001 }
1002
1003 /*
1004  * lib_ring_buffer_switch_old_start: Populate old subbuffer header.
1005  *
1006  * Only executed when the buffer is finalized, in SWITCH_FLUSH.
1007  */
1008 static
1009 void lib_ring_buffer_switch_old_start(struct lttng_ust_lib_ring_buffer *buf,
1010                                       struct channel *chan,
1011                                       struct switch_offsets *offsets,
1012                                       u64 tsc,
1013                                       struct lttng_ust_shm_handle *handle)
1014 {
1015         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1016         unsigned long oldidx = subbuf_index(offsets->old, chan);
1017         unsigned long commit_count;
1018
1019         config->cb.buffer_begin(buf, tsc, oldidx, handle);
1020
1021         /*
1022          * Order all writes to buffer before the commit count update that will
1023          * determine that the subbuffer is full.
1024          */
1025         cmm_smp_wmb();
1026         v_add(config, config->cb.subbuffer_header_size(),
1027               &shmp_index(handle, buf->commit_hot, oldidx)->cc);
1028         commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, oldidx)->cc);
1029         /* Check if the written buffer has to be delivered */
1030         lib_ring_buffer_check_deliver(config, buf, chan, offsets->old,
1031                                       commit_count, oldidx, handle);
1032         lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx,
1033                                              offsets->old, commit_count,
1034                                              config->cb.subbuffer_header_size(),
1035                                              handle);
1036 }
1037
1038 /*
1039  * lib_ring_buffer_switch_old_end: switch old subbuffer
1040  *
1041  * Note : offset_old should never be 0 here. It is ok, because we never perform
1042  * buffer switch on an empty subbuffer in SWITCH_ACTIVE mode. The caller
1043  * increments the offset_old value when doing a SWITCH_FLUSH on an empty
1044  * subbuffer.
1045  */
1046 static
1047 void lib_ring_buffer_switch_old_end(struct lttng_ust_lib_ring_buffer *buf,
1048                                     struct channel *chan,
1049                                     struct switch_offsets *offsets,
1050                                     u64 tsc,
1051                                     struct lttng_ust_shm_handle *handle)
1052 {
1053         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1054         unsigned long oldidx = subbuf_index(offsets->old - 1, chan);
1055         unsigned long commit_count, padding_size, data_size;
1056
1057         data_size = subbuf_offset(offsets->old - 1, chan) + 1;
1058         padding_size = chan->backend.subbuf_size - data_size;
1059         subbuffer_set_data_size(config, &buf->backend, oldidx, data_size,
1060                                 handle);
1061
1062         /*
1063          * Order all writes to buffer before the commit count update that will
1064          * determine that the subbuffer is full.
1065          */
1066         cmm_smp_wmb();
1067         v_add(config, padding_size, &shmp_index(handle, buf->commit_hot, oldidx)->cc);
1068         commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, oldidx)->cc);
1069         lib_ring_buffer_check_deliver(config, buf, chan, offsets->old - 1,
1070                                       commit_count, oldidx, handle);
1071         lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx,
1072                                              offsets->old, commit_count,
1073                                              padding_size, handle);
1074 }
1075
1076 /*
1077  * lib_ring_buffer_switch_new_start: Populate new subbuffer.
1078  *
1079  * This code can be executed unordered : writers may already have written to the
1080  * sub-buffer before this code gets executed, caution.  The commit makes sure
1081  * that this code is executed before the deliver of this sub-buffer.
1082  */
1083 static
1084 void lib_ring_buffer_switch_new_start(struct lttng_ust_lib_ring_buffer *buf,
1085                                       struct channel *chan,
1086                                       struct switch_offsets *offsets,
1087                                       u64 tsc,
1088                                       struct lttng_ust_shm_handle *handle)
1089 {
1090         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1091         unsigned long beginidx = subbuf_index(offsets->begin, chan);
1092         unsigned long commit_count;
1093
1094         config->cb.buffer_begin(buf, tsc, beginidx, handle);
1095
1096         /*
1097          * Order all writes to buffer before the commit count update that will
1098          * determine that the subbuffer is full.
1099          */
1100         cmm_smp_wmb();
1101         v_add(config, config->cb.subbuffer_header_size(),
1102               &shmp_index(handle, buf->commit_hot, beginidx)->cc);
1103         commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, beginidx)->cc);
1104         /* Check if the written buffer has to be delivered */
1105         lib_ring_buffer_check_deliver(config, buf, chan, offsets->begin,
1106                                       commit_count, beginidx, handle);
1107         lib_ring_buffer_write_commit_counter(config, buf, chan, beginidx,
1108                                              offsets->begin, commit_count,
1109                                              config->cb.subbuffer_header_size(),
1110                                              handle);
1111 }
1112
1113 /*
1114  * lib_ring_buffer_switch_new_end: finish switching current subbuffer
1115  *
1116  * The only remaining threads could be the ones with pending commits. They will
1117  * have to do the deliver themselves.
1118  */
1119 static
1120 void lib_ring_buffer_switch_new_end(struct lttng_ust_lib_ring_buffer *buf,
1121                                     struct channel *chan,
1122                                     struct switch_offsets *offsets,
1123                                     u64 tsc,
1124                                     struct lttng_ust_shm_handle *handle)
1125 {
1126         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1127         unsigned long endidx = subbuf_index(offsets->end - 1, chan);
1128         unsigned long commit_count, padding_size, data_size;
1129
1130         data_size = subbuf_offset(offsets->end - 1, chan) + 1;
1131         padding_size = chan->backend.subbuf_size - data_size;
1132         subbuffer_set_data_size(config, &buf->backend, endidx, data_size,
1133                                 handle);
1134
1135         /*
1136          * Order all writes to buffer before the commit count update that will
1137          * determine that the subbuffer is full.
1138          */
1139         cmm_smp_wmb();
1140         v_add(config, padding_size, &shmp_index(handle, buf->commit_hot, endidx)->cc);
1141         commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, endidx)->cc);
1142         lib_ring_buffer_check_deliver(config, buf, chan, offsets->end - 1,
1143                                   commit_count, endidx, handle);
1144         lib_ring_buffer_write_commit_counter(config, buf, chan, endidx,
1145                                              offsets->end, commit_count,
1146                                              padding_size, handle);
1147 }
1148
1149 /*
1150  * Returns :
1151  * 0 if ok
1152  * !0 if execution must be aborted.
1153  */
1154 static
1155 int lib_ring_buffer_try_switch_slow(enum switch_mode mode,
1156                                     struct lttng_ust_lib_ring_buffer *buf,
1157                                     struct channel *chan,
1158                                     struct switch_offsets *offsets,
1159                                     u64 *tsc)
1160 {
1161         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1162         unsigned long off;
1163
1164         offsets->begin = v_read(config, &buf->offset);
1165         offsets->old = offsets->begin;
1166         offsets->switch_old_start = 0;
1167         off = subbuf_offset(offsets->begin, chan);
1168
1169         *tsc = config->cb.ring_buffer_clock_read(chan);
1170
1171         /*
1172          * Ensure we flush the header of an empty subbuffer when doing the
1173          * finalize (SWITCH_FLUSH). This ensures that we end up knowing the
1174          * total data gathering duration even if there were no records saved
1175          * after the last buffer switch.
1176          * In SWITCH_ACTIVE mode, switch the buffer when it contains events.
1177          * SWITCH_ACTIVE only flushes the current subbuffer, dealing with end of
1178          * subbuffer header as appropriate.
1179          * The next record that reserves space will be responsible for
1180          * populating the following subbuffer header. We choose not to populate
1181          * the next subbuffer header here because we want to be able to use
1182          * SWITCH_ACTIVE for periodical buffer flush, which must
1183          * guarantee that all the buffer content (records and header
1184          * timestamps) are visible to the reader. This is required for
1185          * quiescence guarantees for the fusion merge.
1186          */
1187         if (mode == SWITCH_FLUSH || off > 0) {
1188                 if (caa_unlikely(off == 0)) {
1189                         /*
1190                          * The client does not save any header information.
1191                          * Don't switch empty subbuffer on finalize, because it
1192                          * is invalid to deliver a completely empty subbuffer.
1193                          */
1194                         if (!config->cb.subbuffer_header_size())
1195                                 return -1;
1196                         /*
1197                          * Need to write the subbuffer start header on finalize.
1198                          */
1199                         offsets->switch_old_start = 1;
1200                 }
1201                 offsets->begin = subbuf_align(offsets->begin, chan);
1202         } else
1203                 return -1;      /* we do not have to switch : buffer is empty */
1204         /* Note: old points to the next subbuf at offset 0 */
1205         offsets->end = offsets->begin;
1206         return 0;
1207 }
1208
1209 /*
1210  * Force a sub-buffer switch. This operation is completely reentrant : can be
1211  * called while tracing is active with absolutely no lock held.
1212  *
1213  * Note, however, that as a v_cmpxchg is used for some atomic
1214  * operations, this function must be called from the CPU which owns the buffer
1215  * for a ACTIVE flush.
1216  */
1217 void lib_ring_buffer_switch_slow(struct lttng_ust_lib_ring_buffer *buf, enum switch_mode mode,
1218                                  struct lttng_ust_shm_handle *handle)
1219 {
1220         struct channel *chan = shmp(handle, buf->backend.chan);
1221         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1222         struct switch_offsets offsets;
1223         unsigned long oldidx;
1224         u64 tsc;
1225
1226         offsets.size = 0;
1227
1228         /*
1229          * Perform retryable operations.
1230          */
1231         do {
1232                 if (lib_ring_buffer_try_switch_slow(mode, buf, chan, &offsets,
1233                                                     &tsc))
1234                         return; /* Switch not needed */
1235         } while (v_cmpxchg(config, &buf->offset, offsets.old, offsets.end)
1236                  != offsets.old);
1237
1238         /*
1239          * Atomically update last_tsc. This update races against concurrent
1240          * atomic updates, but the race will always cause supplementary full TSC
1241          * records, never the opposite (missing a full TSC record when it would
1242          * be needed).
1243          */
1244         save_last_tsc(config, buf, tsc);
1245
1246         /*
1247          * Push the reader if necessary
1248          */
1249         lib_ring_buffer_reserve_push_reader(buf, chan, offsets.old);
1250
1251         oldidx = subbuf_index(offsets.old, chan);
1252         lib_ring_buffer_clear_noref(config, &buf->backend, oldidx, handle);
1253
1254         /*
1255          * May need to populate header start on SWITCH_FLUSH.
1256          */
1257         if (offsets.switch_old_start) {
1258                 lib_ring_buffer_switch_old_start(buf, chan, &offsets, tsc, handle);
1259                 offsets.old += config->cb.subbuffer_header_size();
1260         }
1261
1262         /*
1263          * Switch old subbuffer.
1264          */
1265         lib_ring_buffer_switch_old_end(buf, chan, &offsets, tsc, handle);
1266 }
1267
1268 /*
1269  * Returns :
1270  * 0 if ok
1271  * -ENOSPC if event size is too large for packet.
1272  * -ENOBUFS if there is currently not enough space in buffer for the event.
1273  * -EIO if data cannot be written into the buffer for any other reason.
1274  */
1275 static
1276 int lib_ring_buffer_try_reserve_slow(struct lttng_ust_lib_ring_buffer *buf,
1277                                      struct channel *chan,
1278                                      struct switch_offsets *offsets,
1279                                      struct lttng_ust_lib_ring_buffer_ctx *ctx)
1280 {
1281         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1282         struct lttng_ust_shm_handle *handle = ctx->handle;
1283         unsigned long reserve_commit_diff;
1284
1285         offsets->begin = v_read(config, &buf->offset);
1286         offsets->old = offsets->begin;
1287         offsets->switch_new_start = 0;
1288         offsets->switch_new_end = 0;
1289         offsets->switch_old_end = 0;
1290         offsets->pre_header_padding = 0;
1291
1292         ctx->tsc = config->cb.ring_buffer_clock_read(chan);
1293         if ((int64_t) ctx->tsc == -EIO)
1294                 return -EIO;
1295
1296         if (last_tsc_overflow(config, buf, ctx->tsc))
1297                 ctx->rflags |= RING_BUFFER_RFLAG_FULL_TSC;
1298
1299         if (caa_unlikely(subbuf_offset(offsets->begin, ctx->chan) == 0)) {
1300                 offsets->switch_new_start = 1;          /* For offsets->begin */
1301         } else {
1302                 offsets->size = config->cb.record_header_size(config, chan,
1303                                                 offsets->begin,
1304                                                 &offsets->pre_header_padding,
1305                                                 ctx);
1306                 offsets->size +=
1307                         lib_ring_buffer_align(offsets->begin + offsets->size,
1308                                               ctx->largest_align)
1309                         + ctx->data_size;
1310                 if (caa_unlikely(subbuf_offset(offsets->begin, chan) +
1311                              offsets->size > chan->backend.subbuf_size)) {
1312                         offsets->switch_old_end = 1;    /* For offsets->old */
1313                         offsets->switch_new_start = 1;  /* For offsets->begin */
1314                 }
1315         }
1316         if (caa_unlikely(offsets->switch_new_start)) {
1317                 unsigned long sb_index;
1318
1319                 /*
1320                  * We are typically not filling the previous buffer completely.
1321                  */
1322                 if (caa_likely(offsets->switch_old_end))
1323                         offsets->begin = subbuf_align(offsets->begin, chan);
1324                 offsets->begin = offsets->begin
1325                                  + config->cb.subbuffer_header_size();
1326                 /* Test new buffer integrity */
1327                 sb_index = subbuf_index(offsets->begin, chan);
1328                 reserve_commit_diff =
1329                   (buf_trunc(offsets->begin, chan)
1330                    >> chan->backend.num_subbuf_order)
1331                   - ((unsigned long) v_read(config,
1332                                             &shmp_index(handle, buf->commit_cold, sb_index)->cc_sb)
1333                      & chan->commit_count_mask);
1334                 if (caa_likely(reserve_commit_diff == 0)) {
1335                         /* Next subbuffer not being written to. */
1336                         if (caa_unlikely(config->mode != RING_BUFFER_OVERWRITE &&
1337                                 subbuf_trunc(offsets->begin, chan)
1338                                  - subbuf_trunc((unsigned long)
1339                                      uatomic_read(&buf->consumed), chan)
1340                                 >= chan->backend.buf_size)) {
1341                                 /*
1342                                  * We do not overwrite non consumed buffers
1343                                  * and we are full : record is lost.
1344                                  */
1345                                 v_inc(config, &buf->records_lost_full);
1346                                 return -ENOBUFS;
1347                         } else {
1348                                 /*
1349                                  * Next subbuffer not being written to, and we
1350                                  * are either in overwrite mode or the buffer is
1351                                  * not full. It's safe to write in this new
1352                                  * subbuffer.
1353                                  */
1354                         }
1355                 } else {
1356                         /*
1357                          * Next subbuffer reserve offset does not match the
1358                          * commit offset. Drop record in producer-consumer and
1359                          * overwrite mode. Caused by either a writer OOPS or too
1360                          * many nested writes over a reserve/commit pair.
1361                          */
1362                         v_inc(config, &buf->records_lost_wrap);
1363                         return -EIO;
1364                 }
1365                 offsets->size =
1366                         config->cb.record_header_size(config, chan,
1367                                                 offsets->begin,
1368                                                 &offsets->pre_header_padding,
1369                                                 ctx);
1370                 offsets->size +=
1371                         lib_ring_buffer_align(offsets->begin + offsets->size,
1372                                               ctx->largest_align)
1373                         + ctx->data_size;
1374                 if (caa_unlikely(subbuf_offset(offsets->begin, chan)
1375                              + offsets->size > chan->backend.subbuf_size)) {
1376                         /*
1377                          * Record too big for subbuffers, report error, don't
1378                          * complete the sub-buffer switch.
1379                          */
1380                         v_inc(config, &buf->records_lost_big);
1381                         return -ENOSPC;
1382                 } else {
1383                         /*
1384                          * We just made a successful buffer switch and the
1385                          * record fits in the new subbuffer. Let's write.
1386                          */
1387                 }
1388         } else {
1389                 /*
1390                  * Record fits in the current buffer and we are not on a switch
1391                  * boundary. It's safe to write.
1392                  */
1393         }
1394         offsets->end = offsets->begin + offsets->size;
1395
1396         if (caa_unlikely(subbuf_offset(offsets->end, chan) == 0)) {
1397                 /*
1398                  * The offset_end will fall at the very beginning of the next
1399                  * subbuffer.
1400                  */
1401                 offsets->switch_new_end = 1;    /* For offsets->begin */
1402         }
1403         return 0;
1404 }
1405
1406 /**
1407  * lib_ring_buffer_reserve_slow - Atomic slot reservation in a buffer.
1408  * @ctx: ring buffer context.
1409  *
1410  * Return : -NOBUFS if not enough space, -ENOSPC if event size too large,
1411  * -EIO for other errors, else returns 0.
1412  * It will take care of sub-buffer switching.
1413  */
1414 int lib_ring_buffer_reserve_slow(struct lttng_ust_lib_ring_buffer_ctx *ctx)
1415 {
1416         struct channel *chan = ctx->chan;
1417         struct lttng_ust_shm_handle *handle = ctx->handle;
1418         const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
1419         struct lttng_ust_lib_ring_buffer *buf;
1420         struct switch_offsets offsets;
1421         int ret;
1422
1423         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
1424                 buf = shmp(handle, chan->backend.buf[ctx->cpu].shmp);
1425         else
1426                 buf = shmp(handle, chan->backend.buf[0].shmp);
1427         ctx->buf = buf;
1428
1429         offsets.size = 0;
1430
1431         do {
1432                 ret = lib_ring_buffer_try_reserve_slow(buf, chan, &offsets,
1433                                                        ctx);
1434                 if (caa_unlikely(ret))
1435                         return ret;
1436         } while (caa_unlikely(v_cmpxchg(config, &buf->offset, offsets.old,
1437                                     offsets.end)
1438                           != offsets.old));
1439
1440         /*
1441          * Atomically update last_tsc. This update races against concurrent
1442          * atomic updates, but the race will always cause supplementary full TSC
1443          * records, never the opposite (missing a full TSC record when it would
1444          * be needed).
1445          */
1446         save_last_tsc(config, buf, ctx->tsc);
1447
1448         /*
1449          * Push the reader if necessary
1450          */
1451         lib_ring_buffer_reserve_push_reader(buf, chan, offsets.end - 1);
1452
1453         /*
1454          * Clear noref flag for this subbuffer.
1455          */
1456         lib_ring_buffer_clear_noref(config, &buf->backend,
1457                                     subbuf_index(offsets.end - 1, chan),
1458                                     handle);
1459
1460         /*
1461          * Switch old subbuffer if needed.
1462          */
1463         if (caa_unlikely(offsets.switch_old_end)) {
1464                 lib_ring_buffer_clear_noref(config, &buf->backend,
1465                                             subbuf_index(offsets.old - 1, chan),
1466                                             handle);
1467                 lib_ring_buffer_switch_old_end(buf, chan, &offsets, ctx->tsc, handle);
1468         }
1469
1470         /*
1471          * Populate new subbuffer.
1472          */
1473         if (caa_unlikely(offsets.switch_new_start))
1474                 lib_ring_buffer_switch_new_start(buf, chan, &offsets, ctx->tsc, handle);
1475
1476         if (caa_unlikely(offsets.switch_new_end))
1477                 lib_ring_buffer_switch_new_end(buf, chan, &offsets, ctx->tsc, handle);
1478
1479         ctx->slot_size = offsets.size;
1480         ctx->pre_offset = offsets.begin;
1481         ctx->buf_offset = offsets.begin + offsets.pre_header_padding;
1482         return 0;
1483 }