libringbuffer/ring_buffer_frontend.c

   1 /*
   2  * ring_buffer_frontend.c
   3  *
   4  * (C) Copyright 2005-2010 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
   5  *
   6  * Ring buffer wait-free buffer synchronization. Producer-consumer and flight
   7  * recorder (overwrite) modes. See thesis:
   8  *
   9  * Desnoyers, Mathieu (2009), "Low-Impact Operating System Tracing", Ph.D.
  10  * dissertation, Ecole Polytechnique de Montreal.
  11  * http://www.lttng.org/pub/thesis/desnoyers-dissertation-2009-12.pdf
  12  *
  13  * - Algorithm presentation in Chapter 5:
  14  *     "Lockless Multi-Core High-Throughput Buffering".
  15  * - Algorithm formal verification in Section 8.6:
  16  *     "Formal verification of LTTng"
  17  *
  18  * Author:
  19  *      Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
  20  *
  21  * Inspired from LTT and RelayFS:
  22  *  Karim Yaghmour <karim@opersys.com>
  23  *  Tom Zanussi <zanussi@us.ibm.com>
  24  *  Bob Wisniewski <bob@watson.ibm.com>
  25  * And from K42 :
  26  *  Bob Wisniewski <bob@watson.ibm.com>
  27  *
  28  * Buffer reader semantic :
  29  *
  30  * - get_subbuf_size
  31  * while buffer is not finalized and empty
  32  *   - get_subbuf
  33  *     - if return value != 0, continue
  34  *   - splice one subbuffer worth of data to a pipe
  35  *   - splice the data from pipe to disk/network
  36  *   - put_subbuf
  37  *
  38  * Dual LGPL v2.1/GPL v2 license.
  39  */
  40
  41 #include <sys/types.h>
  42 #include <sys/mman.h>
  43 #include <sys/stat.h>
  44 #include <fcntl.h>
  45 #include <urcu/compiler.h>
  46 #include <urcu/ref.h>
  47
  48 #include "smp.h"
  49 #include <ust/ringbuffer-config.h>
  50 #include "backend.h"
  51 #include "frontend.h"
  52 #include "shm.h"
  53
  54 #ifndef max
  55 #define max(a, b)       ((a) > (b) ? (a) : (b))
  56 #endif
  57
  58 /*
  59  * Use POSIX SHM: shm_open(3) and shm_unlink(3).
  60  * close(2) to close the fd returned by shm_open.
  61  * shm_unlink releases the shared memory object name.
  62  * ftruncate(2) sets the size of the memory object.
  63  * mmap/munmap maps the shared memory obj to a virtual address in the
  64  * calling proceess (should be done both in libust and consumer).
  65  * See shm_overview(7) for details.
  66  * Pass file descriptor returned by shm_open(3) to ltt-sessiond through
  67  * a UNIX socket.
  68  *
  69  * Since we don't need to access the object using its name, we can
  70  * immediately shm_unlink(3) it, and only keep the handle with its file
  71  * descriptor.
  72  */
  73
  74 /*
  75  * Internal structure representing offsets to use at a sub-buffer switch.
  76  */
  77 struct switch_offsets {
  78         unsigned long begin, end, old;
  79         size_t pre_header_padding, size;
  80         unsigned int switch_new_start:1, switch_new_end:1, switch_old_start:1,
  81                      switch_old_end:1;
  82 };
  83
  84 __thread unsigned int lib_ring_buffer_nesting;
  85
  86 static
  87 void lib_ring_buffer_print_errors(struct channel *chan,
  88                                   struct lib_ring_buffer *buf, int cpu,
  89                                   struct shm_handle *handle);
  90
  91 /*
  92  * Must be called under cpu hotplug protection.
  93  */
  94 void lib_ring_buffer_free(struct lib_ring_buffer *buf,
  95                           struct shm_handle *handle)
  96 {
  97         struct channel *chan = shmp(handle, buf->backend.chan);
  98
  99         lib_ring_buffer_print_errors(chan, buf, buf->backend.cpu, handle);
 100         /* buf->commit_hot will be freed by shm teardown */
 101         /* buf->commit_cold will be freed by shm teardown */
 102
 103         lib_ring_buffer_backend_free(&buf->backend);
 104 }
 105
 106 /**
 107  * lib_ring_buffer_reset - Reset ring buffer to initial values.
 108  * @buf: Ring buffer.
 109  *
 110  * Effectively empty the ring buffer. Should be called when the buffer is not
 111  * used for writing. The ring buffer can be opened for reading, but the reader
 112  * should not be using the iterator concurrently with reset. The previous
 113  * current iterator record is reset.
 114  */
 115 void lib_ring_buffer_reset(struct lib_ring_buffer *buf,
 116                            struct shm_handle *handle)
 117 {
 118         struct channel *chan = shmp(handle, buf->backend.chan);
 119         const struct lib_ring_buffer_config *config = chan->backend.config;
 120         unsigned int i;
 121
 122         /*
 123          * Reset iterator first. It will put the subbuffer if it currently holds
 124          * it.
 125          */
 126         v_set(config, &buf->offset, 0);
 127         for (i = 0; i < chan->backend.num_subbuf; i++) {
 128                 v_set(config, &shmp_index(handle, buf->commit_hot, i)->cc, 0);
 129                 v_set(config, &shmp_index(handle, buf->commit_hot, i)->seq, 0);
 130                 v_set(config, &shmp_index(handle, buf->commit_cold, i)->cc_sb, 0);
 131         }
 132         uatomic_set(&buf->consumed, 0);
 133         uatomic_set(&buf->record_disabled, 0);
 134         v_set(config, &buf->last_tsc, 0);
 135         lib_ring_buffer_backend_reset(&buf->backend, handle);
 136         /* Don't reset number of active readers */
 137         v_set(config, &buf->records_lost_full, 0);
 138         v_set(config, &buf->records_lost_wrap, 0);
 139         v_set(config, &buf->records_lost_big, 0);
 140         v_set(config, &buf->records_count, 0);
 141         v_set(config, &buf->records_overrun, 0);
 142         buf->finalized = 0;
 143 }
 144
 145 /**
 146  * channel_reset - Reset channel to initial values.
 147  * @chan: Channel.
 148  *
 149  * Effectively empty the channel. Should be called when the channel is not used
 150  * for writing. The channel can be opened for reading, but the reader should not
 151  * be using the iterator concurrently with reset. The previous current iterator
 152  * record is reset.
 153  */
 154 void channel_reset(struct channel *chan)
 155 {
 156         /*
 157          * Reset iterators first. Will put the subbuffer if held for reading.
 158          */
 159         uatomic_set(&chan->record_disabled, 0);
 160         /* Don't reset commit_count_mask, still valid */
 161         channel_backend_reset(&chan->backend);
 162         /* Don't reset switch/read timer interval */
 163         /* Don't reset notifiers and notifier enable bits */
 164         /* Don't reset reader reference count */
 165 }
 166
 167 /*
 168  * Must be called under cpu hotplug protection.
 169  */
 170 int lib_ring_buffer_create(struct lib_ring_buffer *buf,
 171                            struct channel_backend *chanb, int cpu,
 172                            struct shm_handle *handle,
 173                            struct shm_object *shmobj)
 174 {
 175         const struct lib_ring_buffer_config *config = chanb->config;
 176         struct channel *chan = caa_container_of(chanb, struct channel, backend);
 177         void *priv = chanb->priv;
 178         unsigned int num_subbuf;
 179         size_t subbuf_header_size;
 180         u64 tsc;
 181         int ret;
 182
 183         /* Test for cpu hotplug */
 184         if (buf->backend.allocated)
 185                 return 0;
 186
 187         ret = lib_ring_buffer_backend_create(&buf->backend, &chan->backend,
 188                         cpu, handle, shmobj);
 189         if (ret)
 190                 return ret;
 191
 192         align_shm(shmobj, __alignof__(struct commit_counters_hot));
 193         set_shmp(buf->commit_hot,
 194                  zalloc_shm(shmobj,
 195                         sizeof(struct commit_counters_hot) * chan->backend.num_subbuf));
 196         if (!shmp(handle, buf->commit_hot)) {
 197                 ret = -ENOMEM;
 198                 goto free_chanbuf;
 199         }
 200
 201         align_shm(shmobj, __alignof__(struct commit_counters_cold));
 202         set_shmp(buf->commit_cold,
 203                  zalloc_shm(shmobj,
 204                         sizeof(struct commit_counters_cold) * chan->backend.num_subbuf));
 205         if (!shmp(handle, buf->commit_cold)) {
 206                 ret = -ENOMEM;
 207                 goto free_commit;
 208         }
 209
 210         num_subbuf = chan->backend.num_subbuf;
 211         //init_waitqueue_head(&buf->read_wait);
 212
 213         /*
 214          * Write the subbuffer header for first subbuffer so we know the total
 215          * duration of data gathering.
 216          */
 217         subbuf_header_size = config->cb.subbuffer_header_size();
 218         v_set(config, &buf->offset, subbuf_header_size);
 219         subbuffer_id_clear_noref(config, &shmp_index(handle, buf->backend.buf_wsb, 0)->id);
 220         tsc = config->cb.ring_buffer_clock_read(shmp(handle, buf->backend.chan));
 221         config->cb.buffer_begin(buf, tsc, 0, handle);
 222         v_add(config, subbuf_header_size, &shmp_index(handle, buf->commit_hot, 0)->cc);
 223
 224         if (config->cb.buffer_create) {
 225                 ret = config->cb.buffer_create(buf, priv, cpu, chanb->name, handle);
 226                 if (ret)
 227                         goto free_init;
 228         }
 229         buf->backend.allocated = 1;
 230         return 0;
 231
 232         /* Error handling */
 233 free_init:
 234         /* commit_cold will be freed by shm teardown */
 235 free_commit:
 236         /* commit_hot will be freed by shm teardown */
 237 free_chanbuf:
 238         lib_ring_buffer_backend_free(&buf->backend);
 239         return ret;
 240 }
 241
 242 #if 0
 243 static void switch_buffer_timer(unsigned long data)
 244 {
 245         struct lib_ring_buffer *buf = (struct lib_ring_buffer *)data;
 246         struct channel *chan = shmp(handle, buf->backend.chan);
 247         const struct lib_ring_buffer_config *config = chan->backend.config;
 248
 249         /*
 250          * Only flush buffers periodically if readers are active.
 251          */
 252         if (uatomic_read(&buf->active_readers))
 253                 lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE, handle);
 254
 255         //TODO timers
 256         //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
 257         //      mod_timer_pinned(&buf->switch_timer,
 258         //                       jiffies + chan->switch_timer_interval);
 259         //else
 260         //      mod_timer(&buf->switch_timer,
 261         //                jiffies + chan->switch_timer_interval);
 262 }
 263 #endif //0
 264
 265 static void lib_ring_buffer_start_switch_timer(struct lib_ring_buffer *buf,
 266                            struct shm_handle *handle)
 267 {
 268         struct channel *chan = shmp(handle, buf->backend.chan);
 269         const struct lib_ring_buffer_config *config = chan->backend.config;
 270
 271         if (!chan->switch_timer_interval || buf->switch_timer_enabled)
 272                 return;
 273         //TODO
 274         //init_timer(&buf->switch_timer);
 275         //buf->switch_timer.function = switch_buffer_timer;
 276         //buf->switch_timer.expires = jiffies + chan->switch_timer_interval;
 277         //buf->switch_timer.data = (unsigned long)buf;
 278         //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
 279         //      add_timer_on(&buf->switch_timer, buf->backend.cpu);
 280         //else
 281         //      add_timer(&buf->switch_timer);
 282         buf->switch_timer_enabled = 1;
 283 }
 284
 285 static void lib_ring_buffer_stop_switch_timer(struct lib_ring_buffer *buf,
 286                            struct shm_handle *handle)
 287 {
 288         struct channel *chan = shmp(handle, buf->backend.chan);
 289
 290         if (!chan->switch_timer_interval || !buf->switch_timer_enabled)
 291                 return;
 292
 293         //TODO
 294         //del_timer_sync(&buf->switch_timer);
 295         buf->switch_timer_enabled = 0;
 296 }
 297
 298 #if 0
 299 /*
 300  * Polling timer to check the channels for data.
 301  */
 302 static void read_buffer_timer(unsigned long data)
 303 {
 304         struct lib_ring_buffer *buf = (struct lib_ring_buffer *)data;
 305         struct channel *chan = shmp(handle, buf->backend.chan);
 306         const struct lib_ring_buffer_config *config = chan->backend.config;
 307
 308         CHAN_WARN_ON(chan, !buf->backend.allocated);
 309
 310         if (uatomic_read(&buf->active_readers)
 311             && lib_ring_buffer_poll_deliver(config, buf, chan)) {
 312                 //TODO
 313                 //wake_up_interruptible(&buf->read_wait);
 314                 //wake_up_interruptible(&chan->read_wait);
 315         }
 316
 317         //TODO
 318         //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
 319         //      mod_timer_pinned(&buf->read_timer,
 320         //                       jiffies + chan->read_timer_interval);
 321         //else
 322         //      mod_timer(&buf->read_timer,
 323         //                jiffies + chan->read_timer_interval);
 324 }
 325 #endif //0
 326
 327 static void lib_ring_buffer_start_read_timer(struct lib_ring_buffer *buf,
 328                            struct shm_handle *handle)
 329 {
 330         struct channel *chan = shmp(handle, buf->backend.chan);
 331         const struct lib_ring_buffer_config *config = chan->backend.config;
 332
 333         if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER
 334             || !chan->read_timer_interval
 335             || buf->read_timer_enabled)
 336                 return;
 337
 338         //TODO
 339         //init_timer(&buf->read_timer);
 340         //buf->read_timer.function = read_buffer_timer;
 341         //buf->read_timer.expires = jiffies + chan->read_timer_interval;
 342         //buf->read_timer.data = (unsigned long)buf;
 343
 344         //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
 345         //      add_timer_on(&buf->read_timer, buf->backend.cpu);
 346         //else
 347         //      add_timer(&buf->read_timer);
 348         buf->read_timer_enabled = 1;
 349 }
 350
 351 static void lib_ring_buffer_stop_read_timer(struct lib_ring_buffer *buf,
 352                            struct shm_handle *handle)
 353 {
 354         struct channel *chan = shmp(handle, buf->backend.chan);
 355         const struct lib_ring_buffer_config *config = chan->backend.config;
 356
 357         if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER
 358             || !chan->read_timer_interval
 359             || !buf->read_timer_enabled)
 360                 return;
 361
 362         //TODO
 363         //del_timer_sync(&buf->read_timer);
 364         /*
 365          * do one more check to catch data that has been written in the last
 366          * timer period.
 367          */
 368         if (lib_ring_buffer_poll_deliver(config, buf, chan, handle)) {
 369                 //TODO
 370                 //wake_up_interruptible(&buf->read_wait);
 371                 //wake_up_interruptible(&chan->read_wait);
 372         }
 373         buf->read_timer_enabled = 0;
 374 }
 375
 376 static void channel_unregister_notifiers(struct channel *chan,
 377                            struct shm_handle *handle)
 378 {
 379         const struct lib_ring_buffer_config *config = chan->backend.config;
 380         int cpu;
 381
 382         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
 383                 for_each_possible_cpu(cpu) {
 384                         struct lib_ring_buffer *buf = shmp(handle, chan->backend.buf[cpu].shmp);
 385
 386                         lib_ring_buffer_stop_switch_timer(buf, handle);
 387                         lib_ring_buffer_stop_read_timer(buf, handle);
 388                 }
 389         } else {
 390                 struct lib_ring_buffer *buf = shmp(handle, chan->backend.buf[0].shmp);
 391
 392                 lib_ring_buffer_stop_switch_timer(buf, handle);
 393                 lib_ring_buffer_stop_read_timer(buf, handle);
 394         }
 395         //channel_backend_unregister_notifiers(&chan->backend);
 396 }
 397
 398 static void channel_free(struct channel *chan, struct shm_handle *handle)
 399 {
 400         int ret;
 401
 402         channel_backend_free(&chan->backend, handle);
 403         /* chan is freed by shm teardown */
 404         shm_object_table_destroy(handle->table);
 405         free(handle);
 406 }
 407
 408 /**
 409  * channel_create - Create channel.
 410  * @config: ring buffer instance configuration
 411  * @name: name of the channel
 412  * @priv: ring buffer client private data
 413  * @buf_addr: pointer the the beginning of the preallocated buffer contiguous
 414  *            address mapping. It is used only by RING_BUFFER_STATIC
 415  *            configuration. It can be set to NULL for other backends.
 416  * @subbuf_size: subbuffer size
 417  * @num_subbuf: number of subbuffers
 418  * @switch_timer_interval: Time interval (in us) to fill sub-buffers with
 419  *                         padding to let readers get those sub-buffers.
 420  *                         Used for live streaming.
 421  * @read_timer_interval: Time interval (in us) to wake up pending readers.
 422  *
 423  * Holds cpu hotplug.
 424  * Returns NULL on failure.
 425  */
 426 struct shm_handle *channel_create(const struct lib_ring_buffer_config *config,
 427                    const char *name, void *priv, void *buf_addr,
 428                    size_t subbuf_size,
 429                    size_t num_subbuf, unsigned int switch_timer_interval,
 430                    unsigned int read_timer_interval)
 431 {
 432         int ret, cpu;
 433         size_t shmsize;
 434         struct channel *chan;
 435         struct shm_handle *handle;
 436         struct shm_object *shmobj;
 437
 438         if (lib_ring_buffer_check_config(config, switch_timer_interval,
 439                                          read_timer_interval))
 440                 return NULL;
 441
 442         handle = zmalloc(sizeof(struct shm_handle));
 443         if (!handle)
 444                 return NULL;
 445
 446         /* Allocate table for channel + per-cpu buffers */
 447         handle->table = shm_object_table_create(1 + num_possible_cpus());
 448         if (!handle->table)
 449                 goto error_table_alloc;
 450
 451         /* Calculate the shm allocation layout */
 452         shmsize = sizeof(struct channel);
 453         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
 454                 shmsize += sizeof(struct lib_ring_buffer_shmp) * num_possible_cpus();
 455         else
 456                 shmsize += sizeof(struct lib_ring_buffer_shmp);
 457
 458         shmobj = shm_object_table_append(handle->table, shmsize);
 459         if (!shmobj)
 460                 goto error_append;
 461         set_shmp(handle->chan, zalloc_shm(shmobj, sizeof(struct channel)));
 462         chan = shmp(handle, handle->chan);
 463         if (!chan)
 464                 goto error_append;
 465
 466         ret = channel_backend_init(&chan->backend, name, config, priv,
 467                                    subbuf_size, num_subbuf, handle);
 468         if (ret)
 469                 goto error_backend_init;
 470
 471         chan->commit_count_mask = (~0UL >> chan->backend.num_subbuf_order);
 472         //TODO
 473         //chan->switch_timer_interval = usecs_to_jiffies(switch_timer_interval);
 474         //chan->read_timer_interval = usecs_to_jiffies(read_timer_interval);
 475         //TODO
 476         //init_waitqueue_head(&chan->read_wait);
 477         //init_waitqueue_head(&chan->hp_wait);
 478
 479         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
 480                 /*
 481                  * In case of non-hotplug cpu, if the ring-buffer is allocated
 482                  * in early initcall, it will not be notified of secondary cpus.
 483                  * In that off case, we need to allocate for all possible cpus.
 484                  */
 485                 for_each_possible_cpu(cpu) {
 486                         struct lib_ring_buffer *buf = shmp(handle, chan->backend.buf[cpu].shmp);
 487                         lib_ring_buffer_start_switch_timer(buf, handle);
 488                         lib_ring_buffer_start_read_timer(buf, handle);
 489                 }
 490         } else {
 491                 struct lib_ring_buffer *buf = shmp(handle, chan->backend.buf[0].shmp);
 492
 493                 lib_ring_buffer_start_switch_timer(buf, handle);
 494                 lib_ring_buffer_start_read_timer(buf, handle);
 495         }
 496
 497         return handle;
 498
 499 error_backend_init:
 500 error_append:
 501         shm_object_table_destroy(handle->table);
 502 error_table_alloc:
 503         free(handle);
 504         return NULL;
 505 }
 506
 507 static
 508 void channel_release(struct channel *chan, struct shm_handle *handle)
 509 {
 510         channel_free(chan, handle);
 511 }
 512
 513 /**
 514  * channel_destroy - Finalize, wait for q.s. and destroy channel.
 515  * @chan: channel to destroy
 516  *
 517  * Holds cpu hotplug.
 518  * Call "destroy" callback, finalize channels, decrement the channel
 519  * reference count. Note that when readers have completed data
 520  * consumption of finalized channels, get_subbuf() will return -ENODATA.
 521  * They should release their handle at that point.  Returns the private
 522  * data pointer.
 523  */
 524 void *channel_destroy(struct channel *chan, struct shm_handle *handle)
 525 {
 526         const struct lib_ring_buffer_config *config = chan->backend.config;
 527         void *priv;
 528         int cpu;
 529
 530         channel_unregister_notifiers(chan, handle);
 531
 532         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
 533                 for_each_channel_cpu(cpu, chan) {
 534                         struct lib_ring_buffer *buf = shmp(handle, chan->backend.buf[cpu].shmp);
 535
 536                         if (config->cb.buffer_finalize)
 537                                 config->cb.buffer_finalize(buf,
 538                                                            chan->backend.priv,
 539                                                            cpu, handle);
 540                         if (buf->backend.allocated)
 541                                 lib_ring_buffer_switch_slow(buf, SWITCH_FLUSH,
 542                                                 handle);
 543                         /*
 544                          * Perform flush before writing to finalized.
 545                          */
 546                         cmm_smp_wmb();
 547                         CMM_ACCESS_ONCE(buf->finalized) = 1;
 548                         //wake_up_interruptible(&buf->read_wait);
 549                 }
 550         } else {
 551                 struct lib_ring_buffer *buf = shmp(handle, chan->backend.buf[0].shmp);
 552
 553                 if (config->cb.buffer_finalize)
 554                         config->cb.buffer_finalize(buf, chan->backend.priv, -1, handle);
 555                 if (buf->backend.allocated)
 556                         lib_ring_buffer_switch_slow(buf, SWITCH_FLUSH,
 557                                                 handle);
 558                 /*
 559                  * Perform flush before writing to finalized.
 560                  */
 561                 cmm_smp_wmb();
 562                 CMM_ACCESS_ONCE(buf->finalized) = 1;
 563                 //wake_up_interruptible(&buf->read_wait);
 564         }
 565         CMM_ACCESS_ONCE(chan->finalized) = 1;
 566         //wake_up_interruptible(&chan->hp_wait);
 567         //wake_up_interruptible(&chan->read_wait);
 568         /*
 569          * sessiond/consumer are keeping a reference on the shm file
 570          * descriptor directly. No need to refcount.
 571          */
 572         priv = chan->backend.priv;
 573         channel_release(chan, handle);
 574         return priv;
 575 }
 576
 577 struct lib_ring_buffer *channel_get_ring_buffer(
 578                                         const struct lib_ring_buffer_config *config,
 579                                         struct channel *chan, int cpu,
 580                                         struct shm_handle *handle,
 581                                         int *shm_fd, int *wait_fd,
 582                                         uint64_t *memory_map_size)
 583 {
 584         struct shm_ref *ref;
 585
 586         if (config->alloc == RING_BUFFER_ALLOC_GLOBAL) {
 587                 ref = &chan->backend.buf[0].shmp._ref;
 588                 shm_get_object_data(handle, ref, shm_fd, wait_fd,
 589                         memory_map_size);
 590                 return shmp(handle, chan->backend.buf[0].shmp);
 591         } else {
 592                 ref = &chan->backend.buf[cpu].shmp._ref;
 593                 shm_get_object_data(handle, ref, shm_fd, wait_fd,
 594                         memory_map_size);
 595                 return shmp(handle, chan->backend.buf[cpu].shmp);
 596         }
 597 }
 598
 599 int lib_ring_buffer_open_read(struct lib_ring_buffer *buf,
 600                               struct shm_handle *handle)
 601 {
 602         struct channel *chan = shmp(handle, buf->backend.chan);
 603
 604         if (uatomic_cmpxchg(&buf->active_readers, 0, 1) != 0)
 605                 return -EBUSY;
 606         cmm_smp_mb();
 607         return 0;
 608 }
 609
 610 void lib_ring_buffer_release_read(struct lib_ring_buffer *buf,
 611                                   struct shm_handle *handle)
 612 {
 613         struct channel *chan = shmp(handle, buf->backend.chan);
 614
 615         CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1);
 616         cmm_smp_mb();
 617         uatomic_dec(&buf->active_readers);
 618 }
 619
 620 /**
 621  * lib_ring_buffer_snapshot - save subbuffer position snapshot (for read)
 622  * @buf: ring buffer
 623  * @consumed: consumed count indicating the position where to read
 624  * @produced: produced count, indicates position when to stop reading
 625  *
 626  * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no
 627  * data to read at consumed position, or 0 if the get operation succeeds.
 628  */
 629
 630 int lib_ring_buffer_snapshot(struct lib_ring_buffer *buf,
 631                              unsigned long *consumed, unsigned long *produced,
 632                              struct shm_handle *handle)
 633 {
 634         struct channel *chan = shmp(handle, buf->backend.chan);
 635         const struct lib_ring_buffer_config *config = chan->backend.config;
 636         unsigned long consumed_cur, write_offset;
 637         int finalized;
 638
 639         finalized = CMM_ACCESS_ONCE(buf->finalized);
 640         /*
 641          * Read finalized before counters.
 642          */
 643         cmm_smp_rmb();
 644         consumed_cur = uatomic_read(&buf->consumed);
 645         /*
 646          * No need to issue a memory barrier between consumed count read and
 647          * write offset read, because consumed count can only change
 648          * concurrently in overwrite mode, and we keep a sequence counter
 649          * identifier derived from the write offset to check we are getting
 650          * the same sub-buffer we are expecting (the sub-buffers are atomically
 651          * "tagged" upon writes, tags are checked upon read).
 652          */
 653         write_offset = v_read(config, &buf->offset);
 654
 655         /*
 656          * Check that we are not about to read the same subbuffer in
 657          * which the writer head is.
 658          */
 659         if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_cur, chan)
 660             == 0)
 661                 goto nodata;
 662
 663         *consumed = consumed_cur;
 664         *produced = subbuf_trunc(write_offset, chan);
 665
 666         return 0;
 667
 668 nodata:
 669         /*
 670          * The memory barriers __wait_event()/wake_up_interruptible() take care
 671          * of "raw_spin_is_locked" memory ordering.
 672          */
 673         if (finalized)
 674                 return -ENODATA;
 675         else
 676                 return -EAGAIN;
 677 }
 678
 679 /**
 680  * lib_ring_buffer_put_snapshot - move consumed counter forward
 681  * @buf: ring buffer
 682  * @consumed_new: new consumed count value
 683  */
 684 void lib_ring_buffer_move_consumer(struct lib_ring_buffer *buf,
 685                                    unsigned long consumed_new,
 686                                    struct shm_handle *handle)
 687 {
 688         struct lib_ring_buffer_backend *bufb = &buf->backend;
 689         struct channel *chan = shmp(handle, bufb->chan);
 690         unsigned long consumed;
 691
 692         CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1);
 693
 694         /*
 695          * Only push the consumed value forward.
 696          * If the consumed cmpxchg fails, this is because we have been pushed by
 697          * the writer in flight recorder mode.
 698          */
 699         consumed = uatomic_read(&buf->consumed);
 700         while ((long) consumed - (long) consumed_new < 0)
 701                 consumed = uatomic_cmpxchg(&buf->consumed, consumed,
 702                                            consumed_new);
 703 }
 704
 705 /**
 706  * lib_ring_buffer_get_subbuf - get exclusive access to subbuffer for reading
 707  * @buf: ring buffer
 708  * @consumed: consumed count indicating the position where to read
 709  *
 710  * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no
 711  * data to read at consumed position, or 0 if the get operation succeeds.
 712  */
 713 int lib_ring_buffer_get_subbuf(struct lib_ring_buffer *buf,
 714                                unsigned long consumed,
 715                                struct shm_handle *handle)
 716 {
 717         struct channel *chan = shmp(handle, buf->backend.chan);
 718         const struct lib_ring_buffer_config *config = chan->backend.config;
 719         unsigned long consumed_cur, consumed_idx, commit_count, write_offset;
 720         int ret;
 721         int finalized;
 722
 723 retry:
 724         finalized = CMM_ACCESS_ONCE(buf->finalized);
 725         /*
 726          * Read finalized before counters.
 727          */
 728         cmm_smp_rmb();
 729         consumed_cur = uatomic_read(&buf->consumed);
 730         consumed_idx = subbuf_index(consumed, chan);
 731         commit_count = v_read(config, &shmp_index(handle, buf->commit_cold, consumed_idx)->cc_sb);
 732         /*
 733          * Make sure we read the commit count before reading the buffer
 734          * data and the write offset. Correct consumed offset ordering
 735          * wrt commit count is insured by the use of cmpxchg to update
 736          * the consumed offset.
 737          */
 738         /*
 739          * Local rmb to match the remote wmb to read the commit count
 740          * before the buffer data and the write offset.
 741          */
 742         cmm_smp_rmb();
 743
 744         write_offset = v_read(config, &buf->offset);
 745
 746         /*
 747          * Check that the buffer we are getting is after or at consumed_cur
 748          * position.
 749          */
 750         if ((long) subbuf_trunc(consumed, chan)
 751             - (long) subbuf_trunc(consumed_cur, chan) < 0)
 752                 goto nodata;
 753
 754         /*
 755          * Check that the subbuffer we are trying to consume has been
 756          * already fully committed.
 757          */
 758         if (((commit_count - chan->backend.subbuf_size)
 759              & chan->commit_count_mask)
 760             - (buf_trunc(consumed_cur, chan)
 761                >> chan->backend.num_subbuf_order)
 762             != 0)
 763                 goto nodata;
 764
 765         /*
 766          * Check that we are not about to read the same subbuffer in
 767          * which the writer head is.
 768          */
 769         if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_cur, chan)
 770             == 0)
 771                 goto nodata;
 772
 773         /*
 774          * Failure to get the subbuffer causes a busy-loop retry without going
 775          * to a wait queue. These are caused by short-lived race windows where
 776          * the writer is getting access to a subbuffer we were trying to get
 777          * access to. Also checks that the "consumed" buffer count we are
 778          * looking for matches the one contained in the subbuffer id.
 779          */
 780         ret = update_read_sb_index(config, &buf->backend, &chan->backend,
 781                                    consumed_idx, buf_trunc_val(consumed, chan),
 782                                    handle);
 783         if (ret)
 784                 goto retry;
 785         subbuffer_id_clear_noref(config, &buf->backend.buf_rsb.id);
 786
 787         buf->get_subbuf_consumed = consumed;
 788         buf->get_subbuf = 1;
 789
 790         return 0;
 791
 792 nodata:
 793         /*
 794          * The memory barriers __wait_event()/wake_up_interruptible() take care
 795          * of "raw_spin_is_locked" memory ordering.
 796          */
 797         if (finalized)
 798                 return -ENODATA;
 799         else
 800                 return -EAGAIN;
 801 }
 802
 803 /**
 804  * lib_ring_buffer_put_subbuf - release exclusive subbuffer access
 805  * @buf: ring buffer
 806  */
 807 void lib_ring_buffer_put_subbuf(struct lib_ring_buffer *buf,
 808                                 struct shm_handle *handle)
 809 {
 810         struct lib_ring_buffer_backend *bufb = &buf->backend;
 811         struct channel *chan = shmp(handle, bufb->chan);
 812         const struct lib_ring_buffer_config *config = chan->backend.config;
 813         unsigned long read_sb_bindex, consumed_idx, consumed;
 814
 815         CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1);
 816
 817         if (!buf->get_subbuf) {
 818                 /*
 819                  * Reader puts a subbuffer it did not get.
 820                  */
 821                 CHAN_WARN_ON(chan, 1);
 822                 return;
 823         }
 824         consumed = buf->get_subbuf_consumed;
 825         buf->get_subbuf = 0;
 826
 827         /*
 828          * Clear the records_unread counter. (overruns counter)
 829          * Can still be non-zero if a file reader simply grabbed the data
 830          * without using iterators.
 831          * Can be below zero if an iterator is used on a snapshot more than
 832          * once.
 833          */
 834         read_sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id);
 835         v_add(config, v_read(config,
 836                              &shmp(handle, shmp_index(handle, bufb->array, read_sb_bindex)->shmp)->records_unread),
 837               &bufb->records_read);
 838         v_set(config, &shmp(handle, shmp_index(handle, bufb->array, read_sb_bindex)->shmp)->records_unread, 0);
 839         CHAN_WARN_ON(chan, config->mode == RING_BUFFER_OVERWRITE
 840                      && subbuffer_id_is_noref(config, bufb->buf_rsb.id));
 841         subbuffer_id_set_noref(config, &bufb->buf_rsb.id);
 842
 843         /*
 844          * Exchange the reader subbuffer with the one we put in its place in the
 845          * writer subbuffer table. Expect the original consumed count. If
 846          * update_read_sb_index fails, this is because the writer updated the
 847          * subbuffer concurrently. We should therefore keep the subbuffer we
 848          * currently have: it has become invalid to try reading this sub-buffer
 849          * consumed count value anyway.
 850          */
 851         consumed_idx = subbuf_index(consumed, chan);
 852         update_read_sb_index(config, &buf->backend, &chan->backend,
 853                              consumed_idx, buf_trunc_val(consumed, chan),
 854                              handle);
 855         /*
 856          * update_read_sb_index return value ignored. Don't exchange sub-buffer
 857          * if the writer concurrently updated it.
 858          */
 859 }
 860
 861 /*
 862  * cons_offset is an iterator on all subbuffer offsets between the reader
 863  * position and the writer position. (inclusive)
 864  */
 865 static
 866 void lib_ring_buffer_print_subbuffer_errors(struct lib_ring_buffer *buf,
 867                                             struct channel *chan,
 868                                             unsigned long cons_offset,
 869                                             int cpu,
 870                                             struct shm_handle *handle)
 871 {
 872         const struct lib_ring_buffer_config *config = chan->backend.config;
 873         unsigned long cons_idx, commit_count, commit_count_sb;
 874
 875         cons_idx = subbuf_index(cons_offset, chan);
 876         commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, cons_idx)->cc);
 877         commit_count_sb = v_read(config, &shmp_index(handle, buf->commit_cold, cons_idx)->cc_sb);
 878
 879         if (subbuf_offset(commit_count, chan) != 0)
 880                 ERRMSG("ring buffer %s, cpu %d: "
 881                        "commit count in subbuffer %lu,\n"
 882                        "expecting multiples of %lu bytes\n"
 883                        "  [ %lu bytes committed, %lu bytes reader-visible ]\n",
 884                        chan->backend.name, cpu, cons_idx,
 885                        chan->backend.subbuf_size,
 886                        commit_count, commit_count_sb);
 887
 888         ERRMSG("ring buffer: %s, cpu %d: %lu bytes committed\n",
 889                chan->backend.name, cpu, commit_count);
 890 }
 891
 892 static
 893 void lib_ring_buffer_print_buffer_errors(struct lib_ring_buffer *buf,
 894                                          struct channel *chan,
 895                                          void *priv, int cpu,
 896                                          struct shm_handle *handle)
 897 {
 898         const struct lib_ring_buffer_config *config = chan->backend.config;
 899         unsigned long write_offset, cons_offset;
 900
 901         /*
 902          * Can be called in the error path of allocation when
 903          * trans_channel_data is not yet set.
 904          */
 905         if (!chan)
 906                 return;
 907         /*
 908          * No need to order commit_count, write_offset and cons_offset reads
 909          * because we execute at teardown when no more writer nor reader
 910          * references are left.
 911          */
 912         write_offset = v_read(config, &buf->offset);
 913         cons_offset = uatomic_read(&buf->consumed);
 914         if (write_offset != cons_offset)
 915                 ERRMSG("ring buffer %s, cpu %d: "
 916                        "non-consumed data\n"
 917                        "  [ %lu bytes written, %lu bytes read ]\n",
 918                        chan->backend.name, cpu, write_offset, cons_offset);
 919
 920         for (cons_offset = uatomic_read(&buf->consumed);
 921              (long) (subbuf_trunc((unsigned long) v_read(config, &buf->offset),
 922                                   chan)
 923                      - cons_offset) > 0;
 924              cons_offset = subbuf_align(cons_offset, chan))
 925                 lib_ring_buffer_print_subbuffer_errors(buf, chan, cons_offset,
 926                                                        cpu, handle);
 927 }
 928
 929 static
 930 void lib_ring_buffer_print_errors(struct channel *chan,
 931                                   struct lib_ring_buffer *buf, int cpu,
 932                                   struct shm_handle *handle)
 933 {
 934         const struct lib_ring_buffer_config *config = chan->backend.config;
 935         void *priv = chan->backend.priv;
 936
 937         ERRMSG("ring buffer %s, cpu %d: %lu records written, "
 938                           "%lu records overrun\n",
 939                           chan->backend.name, cpu,
 940                           v_read(config, &buf->records_count),
 941                           v_read(config, &buf->records_overrun));
 942
 943         if (v_read(config, &buf->records_lost_full)
 944             || v_read(config, &buf->records_lost_wrap)
 945             || v_read(config, &buf->records_lost_big))
 946                 ERRMSG("ring buffer %s, cpu %d: records were lost. Caused by:\n"
 947                        "  [ %lu buffer full, %lu nest buffer wrap-around, "
 948                        "%lu event too big ]\n",
 949                        chan->backend.name, cpu,
 950                        v_read(config, &buf->records_lost_full),
 951                        v_read(config, &buf->records_lost_wrap),
 952                        v_read(config, &buf->records_lost_big));
 953
 954         lib_ring_buffer_print_buffer_errors(buf, chan, priv, cpu, handle);
 955 }
 956
 957 /*
 958  * lib_ring_buffer_switch_old_start: Populate old subbuffer header.
 959  *
 960  * Only executed when the buffer is finalized, in SWITCH_FLUSH.
 961  */
 962 static
 963 void lib_ring_buffer_switch_old_start(struct lib_ring_buffer *buf,
 964                                       struct channel *chan,
 965                                       struct switch_offsets *offsets,
 966                                       u64 tsc,
 967                                       struct shm_handle *handle)
 968 {
 969         const struct lib_ring_buffer_config *config = chan->backend.config;
 970         unsigned long oldidx = subbuf_index(offsets->old, chan);
 971         unsigned long commit_count;
 972
 973         config->cb.buffer_begin(buf, tsc, oldidx, handle);
 974
 975         /*
 976          * Order all writes to buffer before the commit count update that will
 977          * determine that the subbuffer is full.
 978          */
 979         cmm_smp_wmb();
 980         v_add(config, config->cb.subbuffer_header_size(),
 981               &shmp_index(handle, buf->commit_hot, oldidx)->cc);
 982         commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, oldidx)->cc);
 983         /* Check if the written buffer has to be delivered */
 984         lib_ring_buffer_check_deliver(config, buf, chan, offsets->old,
 985                                       commit_count, oldidx, handle);
 986         lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx,
 987                                              offsets->old, commit_count,
 988                                              config->cb.subbuffer_header_size(),
 989                                              handle);
 990 }
 991
 992 /*
 993  * lib_ring_buffer_switch_old_end: switch old subbuffer
 994  *
 995  * Note : offset_old should never be 0 here. It is ok, because we never perform
 996  * buffer switch on an empty subbuffer in SWITCH_ACTIVE mode. The caller
 997  * increments the offset_old value when doing a SWITCH_FLUSH on an empty
 998  * subbuffer.
 999  */
1000 static
1001 void lib_ring_buffer_switch_old_end(struct lib_ring_buffer *buf,
1002                                     struct channel *chan,
1003                                     struct switch_offsets *offsets,
1004                                     u64 tsc,
1005                                     struct shm_handle *handle)
1006 {
1007         const struct lib_ring_buffer_config *config = chan->backend.config;
1008         unsigned long oldidx = subbuf_index(offsets->old - 1, chan);
1009         unsigned long commit_count, padding_size, data_size;
1010
1011         data_size = subbuf_offset(offsets->old - 1, chan) + 1;
1012         padding_size = chan->backend.subbuf_size - data_size;
1013         subbuffer_set_data_size(config, &buf->backend, oldidx, data_size,
1014                                 handle);
1015
1016         /*
1017          * Order all writes to buffer before the commit count update that will
1018          * determine that the subbuffer is full.
1019          */
1020         cmm_smp_wmb();
1021         v_add(config, padding_size, &shmp_index(handle, buf->commit_hot, oldidx)->cc);
1022         commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, oldidx)->cc);
1023         lib_ring_buffer_check_deliver(config, buf, chan, offsets->old - 1,
1024                                       commit_count, oldidx, handle);
1025         lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx,
1026                                              offsets->old, commit_count,
1027                                              padding_size, handle);
1028 }
1029
1030 /*
1031  * lib_ring_buffer_switch_new_start: Populate new subbuffer.
1032  *
1033  * This code can be executed unordered : writers may already have written to the
1034  * sub-buffer before this code gets executed, caution.  The commit makes sure
1035  * that this code is executed before the deliver of this sub-buffer.
1036  */
1037 static
1038 void lib_ring_buffer_switch_new_start(struct lib_ring_buffer *buf,
1039                                       struct channel *chan,
1040                                       struct switch_offsets *offsets,
1041                                       u64 tsc,
1042                                       struct shm_handle *handle)
1043 {
1044         const struct lib_ring_buffer_config *config = chan->backend.config;
1045         unsigned long beginidx = subbuf_index(offsets->begin, chan);
1046         unsigned long commit_count;
1047
1048         config->cb.buffer_begin(buf, tsc, beginidx, handle);
1049
1050         /*
1051          * Order all writes to buffer before the commit count update that will
1052          * determine that the subbuffer is full.
1053          */
1054         cmm_smp_wmb();
1055         v_add(config, config->cb.subbuffer_header_size(),
1056               &shmp_index(handle, buf->commit_hot, beginidx)->cc);
1057         commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, beginidx)->cc);
1058         /* Check if the written buffer has to be delivered */
1059         lib_ring_buffer_check_deliver(config, buf, chan, offsets->begin,
1060                                       commit_count, beginidx, handle);
1061         lib_ring_buffer_write_commit_counter(config, buf, chan, beginidx,
1062                                              offsets->begin, commit_count,
1063                                              config->cb.subbuffer_header_size(),
1064                                              handle);
1065 }
1066
1067 /*
1068  * lib_ring_buffer_switch_new_end: finish switching current subbuffer
1069  *
1070  * The only remaining threads could be the ones with pending commits. They will
1071  * have to do the deliver themselves.
1072  */
1073 static
1074 void lib_ring_buffer_switch_new_end(struct lib_ring_buffer *buf,
1075                                     struct channel *chan,
1076                                     struct switch_offsets *offsets,
1077                                     u64 tsc,
1078                                     struct shm_handle *handle)
1079 {
1080         const struct lib_ring_buffer_config *config = chan->backend.config;
1081         unsigned long endidx = subbuf_index(offsets->end - 1, chan);
1082         unsigned long commit_count, padding_size, data_size;
1083
1084         data_size = subbuf_offset(offsets->end - 1, chan) + 1;
1085         padding_size = chan->backend.subbuf_size - data_size;
1086         subbuffer_set_data_size(config, &buf->backend, endidx, data_size,
1087                                 handle);
1088
1089         /*
1090          * Order all writes to buffer before the commit count update that will
1091          * determine that the subbuffer is full.
1092          */
1093         cmm_smp_wmb();
1094         v_add(config, padding_size, &shmp_index(handle, buf->commit_hot, endidx)->cc);
1095         commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, endidx)->cc);
1096         lib_ring_buffer_check_deliver(config, buf, chan, offsets->end - 1,
1097                                   commit_count, endidx, handle);
1098         lib_ring_buffer_write_commit_counter(config, buf, chan, endidx,
1099                                              offsets->end, commit_count,
1100                                              padding_size, handle);
1101 }
1102
1103 /*
1104  * Returns :
1105  * 0 if ok
1106  * !0 if execution must be aborted.
1107  */
1108 static
1109 int lib_ring_buffer_try_switch_slow(enum switch_mode mode,
1110                                     struct lib_ring_buffer *buf,
1111                                     struct channel *chan,
1112                                     struct switch_offsets *offsets,
1113                                     u64 *tsc)
1114 {
1115         const struct lib_ring_buffer_config *config = chan->backend.config;
1116         unsigned long off;
1117
1118         offsets->begin = v_read(config, &buf->offset);
1119         offsets->old = offsets->begin;
1120         offsets->switch_old_start = 0;
1121         off = subbuf_offset(offsets->begin, chan);
1122
1123         *tsc = config->cb.ring_buffer_clock_read(chan);
1124
1125         /*
1126          * Ensure we flush the header of an empty subbuffer when doing the
1127          * finalize (SWITCH_FLUSH). This ensures that we end up knowing the
1128          * total data gathering duration even if there were no records saved
1129          * after the last buffer switch.
1130          * In SWITCH_ACTIVE mode, switch the buffer when it contains events.
1131          * SWITCH_ACTIVE only flushes the current subbuffer, dealing with end of
1132          * subbuffer header as appropriate.
1133          * The next record that reserves space will be responsible for
1134          * populating the following subbuffer header. We choose not to populate
1135          * the next subbuffer header here because we want to be able to use
1136          * SWITCH_ACTIVE for periodical buffer flush, which must
1137          * guarantee that all the buffer content (records and header
1138          * timestamps) are visible to the reader. This is required for
1139          * quiescence guarantees for the fusion merge.
1140          */
1141         if (mode == SWITCH_FLUSH || off > 0) {
1142                 if (unlikely(off == 0)) {
1143                         /*
1144                          * The client does not save any header information.
1145                          * Don't switch empty subbuffer on finalize, because it
1146                          * is invalid to deliver a completely empty subbuffer.
1147                          */
1148                         if (!config->cb.subbuffer_header_size())
1149                                 return -1;
1150                         /*
1151                          * Need to write the subbuffer start header on finalize.
1152                          */
1153                         offsets->switch_old_start = 1;
1154                 }
1155                 offsets->begin = subbuf_align(offsets->begin, chan);
1156         } else
1157                 return -1;      /* we do not have to switch : buffer is empty */
1158         /* Note: old points to the next subbuf at offset 0 */
1159         offsets->end = offsets->begin;
1160         return 0;
1161 }
1162
1163 /*
1164  * Force a sub-buffer switch. This operation is completely reentrant : can be
1165  * called while tracing is active with absolutely no lock held.
1166  *
1167  * Note, however, that as a v_cmpxchg is used for some atomic
1168  * operations, this function must be called from the CPU which owns the buffer
1169  * for a ACTIVE flush.
1170  */
1171 void lib_ring_buffer_switch_slow(struct lib_ring_buffer *buf, enum switch_mode mode,
1172                                  struct shm_handle *handle)
1173 {
1174         struct channel *chan = shmp(handle, buf->backend.chan);
1175         const struct lib_ring_buffer_config *config = chan->backend.config;
1176         struct switch_offsets offsets;
1177         unsigned long oldidx;
1178         u64 tsc;
1179
1180         offsets.size = 0;
1181
1182         /*
1183          * Perform retryable operations.
1184          */
1185         do {
1186                 if (lib_ring_buffer_try_switch_slow(mode, buf, chan, &offsets,
1187                                                     &tsc))
1188                         return; /* Switch not needed */
1189         } while (v_cmpxchg(config, &buf->offset, offsets.old, offsets.end)
1190                  != offsets.old);
1191
1192         /*
1193          * Atomically update last_tsc. This update races against concurrent
1194          * atomic updates, but the race will always cause supplementary full TSC
1195          * records, never the opposite (missing a full TSC record when it would
1196          * be needed).
1197          */
1198         save_last_tsc(config, buf, tsc);
1199
1200         /*
1201          * Push the reader if necessary
1202          */
1203         lib_ring_buffer_reserve_push_reader(buf, chan, offsets.old);
1204
1205         oldidx = subbuf_index(offsets.old, chan);
1206         lib_ring_buffer_clear_noref(config, &buf->backend, oldidx, handle);
1207
1208         /*
1209          * May need to populate header start on SWITCH_FLUSH.
1210          */
1211         if (offsets.switch_old_start) {
1212                 lib_ring_buffer_switch_old_start(buf, chan, &offsets, tsc, handle);
1213                 offsets.old += config->cb.subbuffer_header_size();
1214         }
1215
1216         /*
1217          * Switch old subbuffer.
1218          */
1219         lib_ring_buffer_switch_old_end(buf, chan, &offsets, tsc, handle);
1220 }
1221
1222 /*
1223  * Returns :
1224  * 0 if ok
1225  * -ENOSPC if event size is too large for packet.
1226  * -ENOBUFS if there is currently not enough space in buffer for the event.
1227  * -EIO if data cannot be written into the buffer for any other reason.
1228  */
1229 static
1230 int lib_ring_buffer_try_reserve_slow(struct lib_ring_buffer *buf,
1231                                      struct channel *chan,
1232                                      struct switch_offsets *offsets,
1233                                      struct lib_ring_buffer_ctx *ctx)
1234 {
1235         const struct lib_ring_buffer_config *config = chan->backend.config;
1236         struct shm_handle *handle = ctx->handle;
1237         unsigned long reserve_commit_diff;
1238
1239         offsets->begin = v_read(config, &buf->offset);
1240         offsets->old = offsets->begin;
1241         offsets->switch_new_start = 0;
1242         offsets->switch_new_end = 0;
1243         offsets->switch_old_end = 0;
1244         offsets->pre_header_padding = 0;
1245
1246         ctx->tsc = config->cb.ring_buffer_clock_read(chan);
1247         if ((int64_t) ctx->tsc == -EIO)
1248                 return -EIO;
1249
1250         if (last_tsc_overflow(config, buf, ctx->tsc))
1251                 ctx->rflags |= RING_BUFFER_RFLAG_FULL_TSC;
1252
1253         if (unlikely(subbuf_offset(offsets->begin, ctx->chan) == 0)) {
1254                 offsets->switch_new_start = 1;          /* For offsets->begin */
1255         } else {
1256                 offsets->size = config->cb.record_header_size(config, chan,
1257                                                 offsets->begin,
1258                                                 &offsets->pre_header_padding,
1259                                                 ctx);
1260                 offsets->size +=
1261                         lib_ring_buffer_align(offsets->begin + offsets->size,
1262                                               ctx->largest_align)
1263                         + ctx->data_size;
1264                 if (unlikely(subbuf_offset(offsets->begin, chan) +
1265                              offsets->size > chan->backend.subbuf_size)) {
1266                         offsets->switch_old_end = 1;    /* For offsets->old */
1267                         offsets->switch_new_start = 1;  /* For offsets->begin */
1268                 }
1269         }
1270         if (unlikely(offsets->switch_new_start)) {
1271                 unsigned long sb_index;
1272
1273                 /*
1274                  * We are typically not filling the previous buffer completely.
1275                  */
1276                 if (likely(offsets->switch_old_end))
1277                         offsets->begin = subbuf_align(offsets->begin, chan);
1278                 offsets->begin = offsets->begin
1279                                  + config->cb.subbuffer_header_size();
1280                 /* Test new buffer integrity */
1281                 sb_index = subbuf_index(offsets->begin, chan);
1282                 reserve_commit_diff =
1283                   (buf_trunc(offsets->begin, chan)
1284                    >> chan->backend.num_subbuf_order)
1285                   - ((unsigned long) v_read(config,
1286                                             &shmp_index(handle, buf->commit_cold, sb_index)->cc_sb)
1287                      & chan->commit_count_mask);
1288                 if (likely(reserve_commit_diff == 0)) {
1289                         /* Next subbuffer not being written to. */
1290                         if (unlikely(config->mode != RING_BUFFER_OVERWRITE &&
1291                                 subbuf_trunc(offsets->begin, chan)
1292                                  - subbuf_trunc((unsigned long)
1293                                      uatomic_read(&buf->consumed), chan)
1294                                 >= chan->backend.buf_size)) {
1295                                 /*
1296                                  * We do not overwrite non consumed buffers
1297                                  * and we are full : record is lost.
1298                                  */
1299                                 v_inc(config, &buf->records_lost_full);
1300                                 return -ENOBUFS;
1301                         } else {
1302                                 /*
1303                                  * Next subbuffer not being written to, and we
1304                                  * are either in overwrite mode or the buffer is
1305                                  * not full. It's safe to write in this new
1306                                  * subbuffer.
1307                                  */
1308                         }
1309                 } else {
1310                         /*
1311                          * Next subbuffer reserve offset does not match the
1312                          * commit offset. Drop record in producer-consumer and
1313                          * overwrite mode. Caused by either a writer OOPS or too
1314                          * many nested writes over a reserve/commit pair.
1315                          */
1316                         v_inc(config, &buf->records_lost_wrap);
1317                         return -EIO;
1318                 }
1319                 offsets->size =
1320                         config->cb.record_header_size(config, chan,
1321                                                 offsets->begin,
1322                                                 &offsets->pre_header_padding,
1323                                                 ctx);
1324                 offsets->size +=
1325                         lib_ring_buffer_align(offsets->begin + offsets->size,
1326                                               ctx->largest_align)
1327                         + ctx->data_size;
1328                 if (unlikely(subbuf_offset(offsets->begin, chan)
1329                              + offsets->size > chan->backend.subbuf_size)) {
1330                         /*
1331                          * Record too big for subbuffers, report error, don't
1332                          * complete the sub-buffer switch.
1333                          */
1334                         v_inc(config, &buf->records_lost_big);
1335                         return -ENOSPC;
1336                 } else {
1337                         /*
1338                          * We just made a successful buffer switch and the
1339                          * record fits in the new subbuffer. Let's write.
1340                          */
1341                 }
1342         } else {
1343                 /*
1344                  * Record fits in the current buffer and we are not on a switch
1345                  * boundary. It's safe to write.
1346                  */
1347         }
1348         offsets->end = offsets->begin + offsets->size;
1349
1350         if (unlikely(subbuf_offset(offsets->end, chan) == 0)) {
1351                 /*
1352                  * The offset_end will fall at the very beginning of the next
1353                  * subbuffer.
1354                  */
1355                 offsets->switch_new_end = 1;    /* For offsets->begin */
1356         }
1357         return 0;
1358 }
1359
1360 /**
1361  * lib_ring_buffer_reserve_slow - Atomic slot reservation in a buffer.
1362  * @ctx: ring buffer context.
1363  *
1364  * Return : -NOBUFS if not enough space, -ENOSPC if event size too large,
1365  * -EIO for other errors, else returns 0.
1366  * It will take care of sub-buffer switching.
1367  */
1368 int lib_ring_buffer_reserve_slow(struct lib_ring_buffer_ctx *ctx)
1369 {
1370         struct channel *chan = ctx->chan;
1371         struct shm_handle *handle = ctx->handle;
1372         const struct lib_ring_buffer_config *config = chan->backend.config;
1373         struct lib_ring_buffer *buf;
1374         struct switch_offsets offsets;
1375         int ret;
1376
1377         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
1378                 buf = shmp(handle, chan->backend.buf[ctx->cpu].shmp);
1379         else
1380                 buf = shmp(handle, chan->backend.buf[0].shmp);
1381         ctx->buf = buf;
1382
1383         offsets.size = 0;
1384
1385         do {
1386                 ret = lib_ring_buffer_try_reserve_slow(buf, chan, &offsets,
1387                                                        ctx);
1388                 if (unlikely(ret))
1389                         return ret;
1390         } while (unlikely(v_cmpxchg(config, &buf->offset, offsets.old,
1391                                     offsets.end)
1392                           != offsets.old));
1393
1394         /*
1395          * Atomically update last_tsc. This update races against concurrent
1396          * atomic updates, but the race will always cause supplementary full TSC
1397          * records, never the opposite (missing a full TSC record when it would
1398          * be needed).
1399          */
1400         save_last_tsc(config, buf, ctx->tsc);
1401
1402         /*
1403          * Push the reader if necessary
1404          */
1405         lib_ring_buffer_reserve_push_reader(buf, chan, offsets.end - 1);
1406
1407         /*
1408          * Clear noref flag for this subbuffer.
1409          */
1410         lib_ring_buffer_clear_noref(config, &buf->backend,
1411                                     subbuf_index(offsets.end - 1, chan),
1412                                     handle);
1413
1414         /*
1415          * Switch old subbuffer if needed.
1416          */
1417         if (unlikely(offsets.switch_old_end)) {
1418                 lib_ring_buffer_clear_noref(config, &buf->backend,
1419                                             subbuf_index(offsets.old - 1, chan),
1420                                             handle);
1421                 lib_ring_buffer_switch_old_end(buf, chan, &offsets, ctx->tsc, handle);
1422         }
1423
1424         /*
1425          * Populate new subbuffer.
1426          */
1427         if (unlikely(offsets.switch_new_start))
1428                 lib_ring_buffer_switch_new_start(buf, chan, &offsets, ctx->tsc, handle);
1429
1430         if (unlikely(offsets.switch_new_end))
1431                 lib_ring_buffer_switch_new_end(buf, chan, &offsets, ctx->tsc, handle);
1432
1433         ctx->slot_size = offsets.size;
1434         ctx->pre_offset = offsets.begin;
1435         ctx->buf_offset = offsets.begin + offsets.pre_header_padding;
1436         return 0;
1437 }