Ring buffer: use shmp (shared-memory pointers) for per-channel shm structures

author Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

Mon, 4 Jul 2011 17:03:45 +0000 (13:03 -0400)

committer Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

Mon, 4 Jul 2011 17:03:45 +0000 (13:03 -0400)
author Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Mon, 4 Jul 2011 17:03:45 +0000 (13:03 -0400)
committer Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Mon, 4 Jul 2011 17:03:45 +0000 (13:03 -0400)
diff --git a/.gitignore b/.gitignore

index e9cac90c5713cc674889979f3261595c04661036..bea8aa397e7884ff642e2d684b42d41ba6529320 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -15,7 +15,8 @@ Makefile.in
  configure
  aclocal.m4
  autom4te.cache/
-config.h
+/config.h
+/include/ust/config.h
  /config.h.in
  config/
  config.log
diff --git a/include/ust/bug.h b/include/ust/bug.h

index 8243cc94a7b7447531cc7d7596400a0f63038f3b..96007c3fb777a5c072f4b6db08151f99dd196026 100644 (file)
--- a/include/ust/bug.h
+++ b/include/ust/bug.h
@@ -9,6 +9,9 @@
   * Dual LGPL v2.1/GPL v2 license.
   */
  
+#define BUILD_BUG_ON(condition)                                        \
+       ((void) sizeof(char[-!!(condition)]))
+
  /**
   * BUILD_RUNTIME_BUG_ON - check condition at build (if constant) or runtime
   * @condition: the condition which should be false.
diff --git a/include/ust/core.h b/include/ust/core.h

index 8c1c490eedde18b0615741ee1ec1a9773c0589ab..f54ea3f277cbb296633f8edcb53d4345b5a56cac 100644 (file)
--- a/include/ust/core.h
+++ b/include/ust/core.h
@@ -20,6 +20,7 @@
  
  #include <sys/types.h>
  #include <ust/config.h>
+#include <urcu/arch.h>
  
  #define likely(x)      __builtin_expect(!!(x), 1)
  #define unlikely(x)    __builtin_expect(!!(x), 0)
diff --git a/include/usterr_signal_safe.h b/include/usterr_signal_safe.h

index f12c317ca6be71c4038d851360062a4c206d9df1..10355dcba849aac3682d162fcdf1051dccdc5da1 100644 (file)
--- a/include/usterr_signal_safe.h
+++ b/include/usterr_signal_safe.h
@@ -62,7 +62,7 @@ static inline void __attribute__ ((format (printf, 1, 2)))
         ust_safe_snprintf(____buf, sizeof(____buf), fmt, ## args); \
  \
         /* Add end of string in case of buffer overflow. */ \
-       ____buf[sizeof(____buf)-1] = 0; \
+       ____buf[sizeof(____buf) - 1] = 0; \
  \
         patient_write(STDERR_FILENO, ____buf, strlen(____buf)); \
         /* Can't print errors because we are in the error printing code path. */ \
diff --git a/libringbuffer/Makefile.am b/libringbuffer/Makefile.am

index 226605337268dcff8e2d817ca07eb4c5fdf2c707..ffa37017c44b077d40049649e180db8490e251e5 100644 (file)
--- a/libringbuffer/Makefile.am
+++ b/libringbuffer/Makefile.am
@@ -3,10 +3,13 @@ AM_CFLAGS = -fno-strict-aliasing
  
  lib_LTLIBRARIES = libringbuffer.la
  
+noinst_HEADERS = \
+       smp.h
+
  libringbuffer_la_SOURCES = \
+       smp.c \
         ring_buffer_backend.c \
-       ring_buffer_frontend.c \
-       ring_buffer_abi.c
+       ring_buffer_frontend.c
  
  libringbuffer_la_LDFLAGS = -no-undefined -version-info 0:0:0
  
diff --git a/libringbuffer/backend.h b/libringbuffer/backend.h

index 61d2f3277f3192765b14b51f8175d40e1b8a8d42..1bd61109ca0096e2685b864324449b5760dc3128 100644 (file)
--- a/libringbuffer/backend.h
+++ b/libringbuffer/backend.h
@@ -32,10 +32,6 @@ extern size_t lib_ring_buffer_read(struct lib_ring_buffer_backend *bufb,
  extern int lib_ring_buffer_read_cstr(struct lib_ring_buffer_backend *bufb,
                                      size_t offset, void *dest, size_t len);
  
-extern struct page **
-lib_ring_buffer_read_get_page(struct lib_ring_buffer_backend *bufb, size_t offset,
-                             void ***virt);
-
  /*
   * Return the address where a given offset is located.
   * Should be used to get the current subbuffer header pointer. Given we know
@@ -68,29 +64,27 @@ void lib_ring_buffer_write(const struct lib_ring_buffer_config *config,
  {
         struct lib_ring_buffer_backend *bufb = &ctx->buf->backend;
         struct channel_backend *chanb = &ctx->chan->backend;
-       size_t sbidx, index;
+       size_t sbidx;
         size_t offset = ctx->buf_offset;
-       ssize_t pagecpy;
         struct lib_ring_buffer_backend_pages *rpages;
         unsigned long sb_bindex, id;
  
         offset &= chanb->buf_size - 1;
         sbidx = offset >> chanb->subbuf_size_order;
-       index = (offset & (chanb->subbuf_size - 1)) >> get_count_order(PAGE_SIZE);
-       pagecpy = min_t(size_t, len, (-offset) & ~PAGE_MASK);
-       id = bufb->buf_wsb[sbidx].id;
+       id = shmp(bufb->buf_wsb)[sbidx].id;
         sb_bindex = subbuffer_id_get_index(config, id);
-       rpages = bufb->array[sb_bindex];
+       rpages = shmp(bufb->array)[sb_bindex];
         CHAN_WARN_ON(ctx->chan,
                      config->mode == RING_BUFFER_OVERWRITE
                      && subbuffer_id_is_noref(config, id));
-       if (likely(pagecpy == len))
-               lib_ring_buffer_do_copy(config,
-                                       rpages->p[index].virt
-                                           + (offset & ~PAGE_MASK),
-                                       src, len);
-       else
-               _lib_ring_buffer_write(bufb, offset, src, len, 0);
+       /*
+        * Underlying layer should never ask for writes across
+        * subbuffers.
+        */
+       CHAN_WARN_ON(chanb, offset >= chanb->buf_size);
+       lib_ring_buffer_do_copy(config,
+                               shmp(rpages->p) + (offset & ~(chanb->subbuf_size - 1)),
+                               src, len);
         ctx->buf_offset += len;
  }
  
@@ -109,16 +103,16 @@ unsigned long lib_ring_buffer_get_records_unread(
         unsigned long records_unread = 0, sb_bindex, id;
         unsigned int i;
  
-       for (i = 0; i < bufb->chan->backend.num_subbuf; i++) {
-               id = bufb->buf_wsb[i].id;
+       for (i = 0; i < shmp(bufb->chan)->backend.num_subbuf; i++) {
+               id = shmp(bufb->buf_wsb)[i].id;
                 sb_bindex = subbuffer_id_get_index(config, id);
-               pages = bufb->array[sb_bindex];
+               pages = shmp(bufb->array)[sb_bindex];
                 records_unread += v_read(config, &pages->records_unread);
         }
         if (config->mode == RING_BUFFER_OVERWRITE) {
                 id = bufb->buf_rsb.id;
                 sb_bindex = subbuffer_id_get_index(config, id);
-               pages = bufb->array[sb_bindex];
+               pages = shmp(bufb->array)[sb_bindex];
                 records_unread += v_read(config, &pages->records_unread);
         }
         return records_unread;
diff --git a/libringbuffer/backend_internal.h b/libringbuffer/backend_internal.h

index c5f3362e53ed03c21267e635e5242a6c7299ccec..182ac97c6d72d3163d2c88be9f7a312d640f9473 100644 (file)
--- a/libringbuffer/backend_internal.h
+++ b/libringbuffer/backend_internal.h
@@ -17,20 +17,22 @@
  #include "config.h"
  #include "backend_types.h"
  #include "frontend_types.h"
+#include "shm.h"
  
  /* Ring buffer backend API presented to the frontend */
  
  /* Ring buffer and channel backend create/free */
  
  int lib_ring_buffer_backend_create(struct lib_ring_buffer_backend *bufb,
-                                  struct channel_backend *chan, int cpu);
+                                  struct channel_backend *chan, int cpu,
+                                  struct shm_header *shm_header);
  void channel_backend_unregister_notifiers(struct channel_backend *chanb);
  void lib_ring_buffer_backend_free(struct lib_ring_buffer_backend *bufb);
  int channel_backend_init(struct channel_backend *chanb,
                          const char *name,
                          const struct lib_ring_buffer_config *config,
                          void *priv, size_t subbuf_size,
-                        size_t num_subbuf);
+                        size_t num_subbuf, struct shm_header *shm_header);
  void channel_backend_free(struct channel_backend *chanb);
  
  void lib_ring_buffer_backend_reset(struct lib_ring_buffer_backend *bufb);
@@ -185,8 +187,8 @@ void subbuffer_count_record(const struct lib_ring_buffer_config *config,
  {
         unsigned long sb_bindex;
  
-       sb_bindex = subbuffer_id_get_index(config, bufb->buf_wsb[idx].id);
-       v_inc(config, &bufb->array[sb_bindex]->records_commit);
+       sb_bindex = subbuffer_id_get_index(config, shmp(bufb->buf_wsb)[idx].id);
+       v_inc(config, &shmp(bufb->array)[sb_bindex]->records_commit);
  }
  
  /*
@@ -201,9 +203,9 @@ void subbuffer_consume_record(const struct lib_ring_buffer_config *config,
  
         sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id);
         CHAN_WARN_ON(bufb->chan,
-                    !v_read(config, &bufb->array[sb_bindex]->records_unread));
+                    !v_read(config, &shmp(bufb->array)[sb_bindex]->records_unread));
         /* Non-atomic decrement protected by exclusive subbuffer access */
-       _v_dec(config, &bufb->array[sb_bindex]->records_unread);
+       _v_dec(config, &shmp(bufb->array)[sb_bindex]->records_unread);
         v_inc(config, &bufb->records_read);
  }
  
@@ -215,8 +217,8 @@ unsigned long subbuffer_get_records_count(
  {
         unsigned long sb_bindex;
  
-       sb_bindex = subbuffer_id_get_index(config, bufb->buf_wsb[idx].id);
-       return v_read(config, &bufb->array[sb_bindex]->records_commit);
+       sb_bindex = subbuffer_id_get_index(config, shmp(bufb->buf_wsb)[idx].id);
+       return v_read(config, &shmp(bufb->array)[sb_bindex]->records_commit);
  }
  
  /*
@@ -234,8 +236,8 @@ unsigned long subbuffer_count_records_overrun(
         struct lib_ring_buffer_backend_pages *pages;
         unsigned long overruns, sb_bindex;
  
-       sb_bindex = subbuffer_id_get_index(config, bufb->buf_wsb[idx].id);
-       pages = bufb->array[sb_bindex];
+       sb_bindex = subbuffer_id_get_index(config, shmp(bufb->buf_wsb)[idx].id);
+       pages = shmp(bufb->array)[sb_bindex];
         overruns = v_read(config, &pages->records_unread);
         v_set(config, &pages->records_unread,
               v_read(config, &pages->records_commit));
@@ -253,8 +255,8 @@ void subbuffer_set_data_size(const struct lib_ring_buffer_config *config,
         struct lib_ring_buffer_backend_pages *pages;
         unsigned long sb_bindex;
  
-       sb_bindex = subbuffer_id_get_index(config, bufb->buf_wsb[idx].id);
-       pages = bufb->array[sb_bindex];
+       sb_bindex = subbuffer_id_get_index(config, shmp(bufb->buf_wsb)[idx].id);
+       pages = shmp(bufb->array)[sb_bindex];
         pages->data_size = data_size;
  }
  
@@ -267,7 +269,7 @@ unsigned long subbuffer_get_read_data_size(
         unsigned long sb_bindex;
  
         sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id);
-       pages = bufb->array[sb_bindex];
+       pages = shmp(bufb->array)[sb_bindex];
         return pages->data_size;
  }
  
@@ -280,8 +282,8 @@ unsigned long subbuffer_get_data_size(
         struct lib_ring_buffer_backend_pages *pages;
         unsigned long sb_bindex;
  
-       sb_bindex = subbuffer_id_get_index(config, bufb->buf_wsb[idx].id);
-       pages = bufb->array[sb_bindex];
+       sb_bindex = subbuffer_id_get_index(config, shmp(bufb->buf_wsb)[idx].id);
+       pages = shmp(bufb->array)[sb_bindex];
         return pages->data_size;
  }
  
@@ -303,7 +305,7 @@ void lib_ring_buffer_clear_noref(const struct lib_ring_buffer_config *config,
          * Performing a volatile access to read the sb_pages, because we want to
          * read a coherent version of the pointer and the associated noref flag.
          */
-       id = CMM_ACCESS_ONCE(bufb->buf_wsb[idx].id);
+       id = CMM_ACCESS_ONCE(shmp(bufb->buf_wsb)[idx].id);
         for (;;) {
                 /* This check is called on the fast path for each record. */
                 if (likely(!subbuffer_id_is_noref(config, id))) {
@@ -317,7 +319,7 @@ void lib_ring_buffer_clear_noref(const struct lib_ring_buffer_config *config,
                 }
                 new_id = id;
                 subbuffer_id_clear_noref(config, &new_id);
-               new_id = uatomic_cmpxchg(&bufb->buf_wsb[idx].id, id, new_id);
+               new_id = uatomic_cmpxchg(&shmp(bufb->buf_wsb)[idx].id, id, new_id);
                 if (likely(new_id == id))
                         break;
                 id = new_id;
@@ -348,13 +350,13 @@ void lib_ring_buffer_set_noref_offset(const struct lib_ring_buffer_config *confi
          * readers of the noref flag.
          */
         CHAN_WARN_ON(bufb->chan,
-                    subbuffer_id_is_noref(config, bufb->buf_wsb[idx].id));
+                    subbuffer_id_is_noref(config, shmp(bufb->buf_wsb)[idx].id));
         /*
          * Memory barrier that ensures counter stores are ordered before set
          * noref and offset.
          */
         cmm_smp_mb();
-       subbuffer_id_set_noref_offset(config, &bufb->buf_wsb[idx].id, offset);
+       subbuffer_id_set_noref_offset(config, &shmp(bufb->buf_wsb)[idx].id, offset);
  }
  
  /**
@@ -376,7 +378,7 @@ int update_read_sb_index(const struct lib_ring_buffer_config *config,
                  * old_wpage, because the value read will be confirmed by the
                  * following cmpxchg().
                  */
-               old_id = bufb->buf_wsb[consumed_idx].id;
+               old_id = shmp(bufb->buf_wsb)[consumed_idx].id;
                 if (unlikely(!subbuffer_id_is_noref(config, old_id)))
                         return -EAGAIN;
                 /*
@@ -390,14 +392,14 @@ int update_read_sb_index(const struct lib_ring_buffer_config *config,
                              !subbuffer_id_is_noref(config, bufb->buf_rsb.id));
                 subbuffer_id_set_noref_offset(config, &bufb->buf_rsb.id,
                                               consumed_count);
-               new_id = uatomic_cmpxchg(&bufb->buf_wsb[consumed_idx].id, old_id,
+               new_id = uatomic_cmpxchg(&shmp(bufb->buf_wsb)[consumed_idx].id, old_id,
                                  bufb->buf_rsb.id);
                 if (unlikely(old_id != new_id))
                         return -EAGAIN;
                 bufb->buf_rsb.id = new_id;
         } else {
                 /* No page exchange, use the writer page directly */
-               bufb->buf_rsb.id = bufb->buf_wsb[consumed_idx].id;
+               bufb->buf_rsb.id = shmp(bufb->buf_wsb)[consumed_idx].id;
         }
         return 0;
  }
diff --git a/libringbuffer/backend_types.h b/libringbuffer/backend_types.h

index cfbe59c3cdedefb091148f648671bd35972c01c0..3bc36ba81ee896fbd295e5973b6f9cbe052cdbfe 100644 (file)
--- a/libringbuffer/backend_types.h
+++ b/libringbuffer/backend_types.h
@@ -11,17 +11,14 @@
   * Dual LGPL v2.1/GPL v2 license.
   */
  
-struct lib_ring_buffer_backend_page {
-       void *virt;                     /* page virtual address (cached) */
-       struct page *page;              /* pointer to page structure */
-};
+#include "shm.h"
  
  struct lib_ring_buffer_backend_pages {
         unsigned long mmap_offset;      /* offset of the subbuffer in mmap */
         union v_atomic records_commit;  /* current records committed count */
         union v_atomic records_unread;  /* records to read */
         unsigned long data_size;        /* Amount of data to read from subbuf */
-       struct lib_ring_buffer_backend_page p[];
+       DECLARE_SHMP(char, p);          /* Backing memory map */
  };
  
  struct lib_ring_buffer_backend_subbuffer {
@@ -37,17 +34,17 @@ struct lib_ring_buffer;
  
  struct lib_ring_buffer_backend {
         /* Array of ring_buffer_backend_subbuffer for writer */
-       struct lib_ring_buffer_backend_subbuffer *buf_wsb;
+       DECLARE_SHMP(struct lib_ring_buffer_backend_subbuffer, buf_wsb);
         /* ring_buffer_backend_subbuffer for reader */
         struct lib_ring_buffer_backend_subbuffer buf_rsb;
         /*
          * Pointer array of backend pages, for whole buffer.
          * Indexed by ring_buffer_backend_subbuffer identifier (id) index.
          */
-       struct lib_ring_buffer_backend_pages **array;
-       unsigned int num_pages_per_subbuf;
+       DECLARE_SHMP(struct lib_ring_buffer_backend_pages *, array);
+       DECLARE_SHMP(char, memory_map); /* memory mapping */
  
-       struct channel *chan;           /* Associated channel */
+       DECLARE_SHMP(struct channel, chan);     /* Associated channel */
         int cpu;                        /* This buffer's cpu. -1 if global. */
         union v_atomic records_read;    /* Number of records read */
         unsigned int allocated:1;       /* Bool: is buffer allocated ? */
@@ -63,8 +60,7 @@ struct channel_backend {
                                          */
         unsigned int buf_size_order;    /* Order of buffer size */
         int extra_reader_sb:1;          /* Bool: has extra reader subbuffer */
-       struct lib_ring_buffer *buf;    /* Channel per-cpu buffers */
-
+       DECLARE_SHMP(struct lib_ring_buffer, buf); /* Channel per-cpu buffers */
         unsigned long num_subbuf;       /* Number of sub-buffers for writer */
         u64 start_tsc;                  /* Channel creation TSC value */
         void *priv;                     /* Client-specific information */
diff --git a/libringbuffer/config.h b/libringbuffer/config.h

new file mode 100644 (file)

index 0000000..900208f
--- /dev/null
+++ b/libringbuffer/config.h
@@ -0,0 +1,298 @@
+#ifndef _LINUX_RING_BUFFER_CONFIG_H
+#define _LINUX_RING_BUFFER_CONFIG_H
+
+/*
+ * linux/ringbuffer/config.h
+ *
+ * Copyright (C) 2010 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * Ring buffer configuration header. Note: after declaring the standard inline
+ * functions, clients should also include linux/ringbuffer/api.h.
+ *
+ * Dual LGPL v2.1/GPL v2 license.
+ */
+
+#include <errno.h>
+#include "ust/kcompat/kcompat.h"
+#include "ust/align.h"
+
+struct lib_ring_buffer;
+struct channel;
+struct lib_ring_buffer_config;
+struct lib_ring_buffer_ctx;
+
+/*
+ * Ring buffer client callbacks. Only used by slow path, never on fast path.
+ * For the fast path, record_header_size(), ring_buffer_clock_read() should be
+ * provided as inline functions too.  These may simply return 0 if not used by
+ * the client.
+ */
+struct lib_ring_buffer_client_cb {
+       /* Mandatory callbacks */
+
+       /* A static inline version is also required for fast path */
+       u64 (*ring_buffer_clock_read) (struct channel *chan);
+       size_t (*record_header_size) (const struct lib_ring_buffer_config *config,
+                                     struct channel *chan, size_t offset,
+                                     size_t *pre_header_padding,
+                                     struct lib_ring_buffer_ctx *ctx);
+
+       /* Slow path only, at subbuffer switch */
+       size_t (*subbuffer_header_size) (void);
+       void (*buffer_begin) (struct lib_ring_buffer *buf, u64 tsc,
+                             unsigned int subbuf_idx);
+       void (*buffer_end) (struct lib_ring_buffer *buf, u64 tsc,
+                           unsigned int subbuf_idx, unsigned long data_size);
+
+       /* Optional callbacks (can be set to NULL) */
+
+       /* Called at buffer creation/finalize */
+       int (*buffer_create) (struct lib_ring_buffer *buf, void *priv,
+                             int cpu, const char *name);
+       /*
+        * Clients should guarantee that no new reader handle can be opened
+        * after finalize.
+        */
+       void (*buffer_finalize) (struct lib_ring_buffer *buf, void *priv, int cpu);
+
+       /*
+        * Extract header length, payload length and timestamp from event
+        * record. Used by buffer iterators. Timestamp is only used by channel
+        * iterator.
+        */
+       void (*record_get) (const struct lib_ring_buffer_config *config,
+                           struct channel *chan, struct lib_ring_buffer *buf,
+                           size_t offset, size_t *header_len,
+                           size_t *payload_len, u64 *timestamp);
+};
+
+/*
+ * Ring buffer instance configuration.
+ *
+ * Declare as "static const" within the client object to ensure the inline fast
+ * paths can be optimized.
+ *
+ * alloc/sync pairs:
+ *
+ * RING_BUFFER_ALLOC_PER_CPU and RING_BUFFER_SYNC_PER_CPU :
+ *   Per-cpu buffers with per-cpu synchronization. Tracing must be performed
+ *   with preemption disabled (lib_ring_buffer_get_cpu() and
+ *   lib_ring_buffer_put_cpu()).
+ *
+ * RING_BUFFER_ALLOC_PER_CPU and RING_BUFFER_SYNC_GLOBAL :
+ *   Per-cpu buffer with global synchronization. Tracing can be performed with
+ *   preemption enabled, statistically stays on the local buffers.
+ *
+ * RING_BUFFER_ALLOC_GLOBAL and RING_BUFFER_SYNC_PER_CPU :
+ *   Should only be used for buffers belonging to a single thread or protected
+ *   by mutual exclusion by the client. Note that periodical sub-buffer switch
+ *   should be disabled in this kind of configuration.
+ *
+ * RING_BUFFER_ALLOC_GLOBAL and RING_BUFFER_SYNC_GLOBAL :
+ *   Global shared buffer with global synchronization.
+ *
+ * wakeup:
+ *
+ * RING_BUFFER_WAKEUP_BY_TIMER uses per-cpu deferrable timers to poll the
+ * buffers and wake up readers if data is ready. Mainly useful for tracers which
+ * don't want to call into the wakeup code on the tracing path. Use in
+ * combination with "read_timer_interval" channel_create() argument.
+ *
+ * RING_BUFFER_WAKEUP_BY_WRITER directly wakes up readers when a subbuffer is
+ * ready to read. Lower latencies before the reader is woken up. Mainly suitable
+ * for drivers.
+ *
+ * RING_BUFFER_WAKEUP_NONE does not perform any wakeup whatsoever. The client
+ * has the responsibility to perform wakeups.
+ */
+struct lib_ring_buffer_config {
+       enum {
+               RING_BUFFER_ALLOC_PER_CPU,
+               RING_BUFFER_ALLOC_GLOBAL,
+       } alloc;
+       enum {
+               RING_BUFFER_SYNC_PER_CPU,       /* Wait-free */
+               RING_BUFFER_SYNC_GLOBAL,        /* Lock-free */
+       } sync;
+       enum {
+               RING_BUFFER_OVERWRITE,          /* Overwrite when buffer full */
+               RING_BUFFER_DISCARD,            /* Discard when buffer full */
+       } mode;
+       enum {
+               RING_BUFFER_SPLICE,
+               RING_BUFFER_MMAP,
+               RING_BUFFER_READ,               /* TODO */
+               RING_BUFFER_ITERATOR,
+               RING_BUFFER_NONE,
+       } output;
+       enum {
+               RING_BUFFER_PAGE,
+               RING_BUFFER_VMAP,               /* TODO */
+               RING_BUFFER_STATIC,             /* TODO */
+       } backend;
+       enum {
+               RING_BUFFER_NO_OOPS_CONSISTENCY,
+               RING_BUFFER_OOPS_CONSISTENCY,
+       } oops;
+       enum {
+               RING_BUFFER_IPI_BARRIER,
+               RING_BUFFER_NO_IPI_BARRIER,
+       } ipi;
+       enum {
+               RING_BUFFER_WAKEUP_BY_TIMER,    /* wake up performed by timer */
+               RING_BUFFER_WAKEUP_BY_WRITER,   /*
+                                                * writer wakes up reader,
+                                                * not lock-free
+                                                * (takes spinlock).
+                                                */
+       } wakeup;
+       /*
+        * tsc_bits: timestamp bits saved at each record.
+        *   0 and 64 disable the timestamp compression scheme.
+        */
+       unsigned int tsc_bits;
+       struct lib_ring_buffer_client_cb cb;
+};
+
+/*
+ * ring buffer context
+ *
+ * Context passed to lib_ring_buffer_reserve(), lib_ring_buffer_commit(),
+ * lib_ring_buffer_try_discard_reserve(), lib_ring_buffer_align_ctx() and
+ * lib_ring_buffer_write().
+ */
+struct lib_ring_buffer_ctx {
+       /* input received by lib_ring_buffer_reserve(), saved here. */
+       struct channel *chan;           /* channel */
+       void *priv;                     /* client private data */
+       size_t data_size;               /* size of payload */
+       int largest_align;              /*
+                                        * alignment of the largest element
+                                        * in the payload
+                                        */
+       int cpu;                        /* processor id */
+
+       /* output from lib_ring_buffer_reserve() */
+       struct lib_ring_buffer *buf;    /*
+                                        * buffer corresponding to processor id
+                                        * for this channel
+                                        */
+       size_t slot_size;               /* size of the reserved slot */
+       unsigned long buf_offset;       /* offset following the record header */
+       unsigned long pre_offset;       /*
+                                        * Initial offset position _before_
+                                        * the record is written. Positioned
+                                        * prior to record header alignment
+                                        * padding.
+                                        */
+       u64 tsc;                        /* time-stamp counter value */
+       unsigned int rflags;            /* reservation flags */
+};
+
+/**
+ * lib_ring_buffer_ctx_init - initialize ring buffer context
+ * @ctx: ring buffer context to initialize
+ * @chan: channel
+ * @priv: client private data
+ * @data_size: size of record data payload
+ * @largest_align: largest alignment within data payload types
+ * @cpu: processor id
+ */
+static inline
+void lib_ring_buffer_ctx_init(struct lib_ring_buffer_ctx *ctx,
+                             struct channel *chan, void *priv,
+                             size_t data_size, int largest_align,
+                             int cpu)
+{
+       ctx->chan = chan;
+       ctx->priv = priv;
+       ctx->data_size = data_size;
+       ctx->largest_align = largest_align;
+       ctx->cpu = cpu;
+       ctx->rflags = 0;
+}
+
+/*
+ * Reservation flags.
+ *
+ * RING_BUFFER_RFLAG_FULL_TSC
+ *
+ * This flag is passed to record_header_size() and to the primitive used to
+ * write the record header. It indicates that the full 64-bit time value is
+ * needed in the record header. If this flag is not set, the record header needs
+ * only to contain "tsc_bits" bit of time value.
+ *
+ * Reservation flags can be added by the client, starting from
+ * "(RING_BUFFER_FLAGS_END << 0)". It can be used to pass information from
+ * record_header_size() to lib_ring_buffer_write_record_header().
+ */
+#define        RING_BUFFER_RFLAG_FULL_TSC              (1U << 0)
+#define RING_BUFFER_RFLAG_END                  (1U << 1)
+
+/*
+ * We need to define RING_BUFFER_ALIGN_ATTR so it is known early at
+ * compile-time. We have to duplicate the "config->align" information and the
+ * definition here because config->align is used both in the slow and fast
+ * paths, but RING_BUFFER_ALIGN_ATTR is only available for the client code.
+ */
+#ifdef RING_BUFFER_ALIGN
+
+# define RING_BUFFER_ALIGN_ATTR                /* Default arch alignment */
+
+/*
+ * Calculate the offset needed to align the type.
+ * size_of_type must be non-zero.
+ */
+static inline
+unsigned int lib_ring_buffer_align(size_t align_drift, size_t size_of_type)
+{
+       return offset_align(align_drift, size_of_type);
+}
+
+#else
+
+# define RING_BUFFER_ALIGN_ATTR __attribute__((packed))
+
+/*
+ * Calculate the offset needed to align the type.
+ * size_of_type must be non-zero.
+ */
+static inline
+unsigned int lib_ring_buffer_align(size_t align_drift, size_t size_of_type)
+{
+       return 0;
+}
+
+#endif
+
+/**
+ * lib_ring_buffer_align_ctx - Align context offset on "alignment"
+ * @ctx: ring buffer context.
+ */
+static inline
+void lib_ring_buffer_align_ctx(struct lib_ring_buffer_ctx *ctx,
+                          size_t alignment)
+{
+       ctx->buf_offset += lib_ring_buffer_align(ctx->buf_offset,
+                                                alignment);
+}
+
+/*
+ * lib_ring_buffer_check_config() returns 0 on success.
+ * Used internally to check for valid configurations at channel creation.
+ */
+static inline
+int lib_ring_buffer_check_config(const struct lib_ring_buffer_config *config,
+                            unsigned int switch_timer_interval,
+                            unsigned int read_timer_interval)
+{
+       if (config->alloc == RING_BUFFER_ALLOC_GLOBAL
+           && config->sync == RING_BUFFER_SYNC_PER_CPU
+           && switch_timer_interval)
+               return -EINVAL;
+       return 0;
+}
+
+#include "vatomic.h"
+
+#endif /* _LINUX_RING_BUFFER_CONFIG_H */
diff --git a/libringbuffer/frontend.h b/libringbuffer/frontend.h

index 9d73da16fee45b0f28937d86f2a459d04c6767a2..fe301c1b4ee981268fdc8f4e71e8de241ceae012 100644 (file)
--- a/libringbuffer/frontend.h
+++ b/libringbuffer/frontend.h
@@ -19,6 +19,7 @@
  #include <urcu/compiler.h>
  #include <urcu/uatomic.h>
  
+#include "smp.h"
  /* Internal helpers */
  #include "frontend_internal.h"
  
@@ -41,7 +42,8 @@ struct channel *channel_create(const struct lib_ring_buffer_config *config,
                                void *buf_addr,
                                size_t subbuf_size, size_t num_subbuf,
                                unsigned int switch_timer_interval,
-                              unsigned int read_timer_interval);
+                              unsigned int read_timer_interval,
+                              int *shmid);
  
  /*
   * channel_destroy returns the private data pointer. It finalizes all channel's
@@ -61,9 +63,7 @@ void *channel_destroy(struct channel *chan);
   * only performed at channel destruction.
   */
  #define for_each_channel_cpu(cpu, chan)                                        \
-       for ((cpu) = -1;                                                \
-               ({ (cpu) = cpumask_next(cpu, (chan)->backend.cpumask);  \
-                  cmm_smp_read_barrier_depends(); (cpu) < nr_cpu_ids; });)
+       for_each_possible_cpu(cpu)
  
  extern struct lib_ring_buffer *channel_get_ring_buffer(
                                 const struct lib_ring_buffer_config *config,
@@ -104,7 +104,7 @@ static inline void lib_ring_buffer_put_next_subbuf(struct lib_ring_buffer *buf)
  {
         lib_ring_buffer_put_subbuf(buf);
         lib_ring_buffer_move_consumer(buf, subbuf_align(buf->cons_snapshot,
-                                                   buf->backend.chan));
+                                                   shmp(buf->backend.chan)));
  }
  
  extern void channel_reset(struct channel *chan);
diff --git a/libringbuffer/frontend_internal.h b/libringbuffer/frontend_internal.h

index f758a6842257ece1dfaea0bb7a7e93e5d802b7ad..6a1d3a6cb523fd36cf9e69c7d28fb1c4852872e9 100644 (file)
--- a/libringbuffer/frontend_internal.h
+++ b/libringbuffer/frontend_internal.h
@@ -21,6 +21,7 @@
  #include "config.h"
  #include "backend_types.h"
  #include "frontend_types.h"
+#include "shm.h"
  
  /* Buffer offset macros */
  
@@ -182,7 +183,7 @@ void lib_ring_buffer_vmcore_check_deliver(const struct lib_ring_buffer_config *c
                                           unsigned long idx)
  {
         if (config->oops == RING_BUFFER_OOPS_CONSISTENCY)
-               v_set(config, &buf->commit_hot[idx].seq, commit_count);
+               v_set(config, &shmp(buf->commit_hot)[idx].seq, commit_count);
  }
  
  static inline
@@ -194,7 +195,7 @@ int lib_ring_buffer_poll_deliver(const struct lib_ring_buffer_config *config,
  
         consumed_old = uatomic_read(&buf->consumed);
         consumed_idx = subbuf_index(consumed_old, chan);
-       commit_count = v_read(config, &buf->commit_cold[consumed_idx].cc_sb);
+       commit_count = v_read(config, &shmp(buf->commit_cold)[consumed_idx].cc_sb);
         /*
          * No memory barrier here, since we are only interested
          * in a statistically correct polling result. The next poll will
@@ -269,7 +270,7 @@ int lib_ring_buffer_reserve_committed(const struct lib_ring_buffer_config *confi
         do {
                 offset = v_read(config, &buf->offset);
                 idx = subbuf_index(offset, chan);
-               commit_count = v_read(config, &buf->commit_hot[idx].cc);
+               commit_count = v_read(config, &shmp(buf->commit_hot)[idx].cc);
         } while (offset != v_read(config, &buf->offset));
  
         return ((buf_trunc(offset, chan) >> chan->backend.num_subbuf_order)
@@ -317,7 +318,7 @@ void lib_ring_buffer_check_deliver(const struct lib_ring_buffer_config *config,
                  * The subbuffer size is least 2 bytes (minimum size: 1 page).
                  * This guarantees that old_commit_count + 1 != commit_count.
                  */
-               if (likely(v_cmpxchg(config, &buf->commit_cold[idx].cc_sb,
+               if (likely(v_cmpxchg(config, &shmp(buf->commit_cold)[idx].cc_sb,
                                          old_commit_count, old_commit_count + 1)
                            == old_commit_count)) {
                         /*
@@ -357,7 +358,7 @@ void lib_ring_buffer_check_deliver(const struct lib_ring_buffer_config *config,
                          */
                         cmm_smp_mb();
                         /* End of exclusive subbuffer access */
-                       v_set(config, &buf->commit_cold[idx].cc_sb,
+                       v_set(config, &shmp(buf->commit_cold)[idx].cc_sb,
                               commit_count);
                         lib_ring_buffer_vmcore_check_deliver(config, buf,
                                                          commit_count, idx);
@@ -409,14 +410,15 @@ void lib_ring_buffer_write_commit_counter(const struct lib_ring_buffer_config *c
         if (unlikely(subbuf_offset(offset - commit_count, chan)))
                 return;
  
-       commit_seq_old = v_read(config, &buf->commit_hot[idx].seq);
+       commit_seq_old = v_read(config, &shmp(buf->commit_hot)[idx].seq);
         while ((long) (commit_seq_old - commit_count) < 0)
-               commit_seq_old = v_cmpxchg(config, &buf->commit_hot[idx].seq,
+               commit_seq_old = v_cmpxchg(config, &shmp(buf->commit_hot)[idx].seq,
                                            commit_seq_old, commit_count);
  }
  
  extern int lib_ring_buffer_create(struct lib_ring_buffer *buf,
-                                 struct channel_backend *chanb, int cpu);
+                                 struct channel_backend *chanb, int cpu,
+                                 struct shm_header *shm_header);
  extern void lib_ring_buffer_free(struct lib_ring_buffer *buf);
  
  /* Keep track of trap nesting inside ring buffer code */
diff --git a/libringbuffer/frontend_types.h b/libringbuffer/frontend_types.h

index c9f98cb79810dd9385e578da5f520c60e8582340..d1a3d200be7130076805a2c0ef019b6054bf281f 100644 (file)
--- a/libringbuffer/frontend_types.h
+++ b/libringbuffer/frontend_types.h
@@ -16,6 +16,8 @@
   * Dual LGPL v2.1/GPL v2 license.
   */
  
+#include <string.h>
+
  #include <urcu/list.h>
  #include <urcu/uatomic.h>
  #include <urcu/ref.h>
@@ -25,6 +27,7 @@
  #include "usterr_signal_safe.h"
  #include "config.h"
  #include "backend_types.h"
+#include "shm.h"
  
  /*
   * A switch is done during tracing or as a final flush after tracing (so it
@@ -49,24 +52,25 @@ struct channel {
         //wait_queue_head_t read_wait;          /* reader wait queue */
         int finalized;                          /* Has channel been finalized */
         struct urcu_ref ref;                    /* Reference count */
-};
+       DECLARE_SHMP(struct shm_header, shm_header);
+} ____cacheline_aligned;
  
  /* Per-subbuffer commit counters used on the hot path */
  struct commit_counters_hot {
         union v_atomic cc;              /* Commit counter */
         union v_atomic seq;             /* Consecutive commits */
-};
+} ____cacheline_aligned;
  
  /* Per-subbuffer commit counters used only on cold paths */
  struct commit_counters_cold {
         union v_atomic cc_sb;           /* Incremented _once_ at sb switch */
-};
+} ____cacheline_aligned;
  
  /* ring buffer state */
  struct lib_ring_buffer {
         /* First 32 bytes cache-hot cacheline */
         union v_atomic offset;          /* Current offset in the buffer */
-       struct commit_counters_hot *commit_hot;
+       DECLARE_SHMP(struct commit_counters_hot, commit_hot);
                                         /* Commit count per sub-buffer */
         long consumed;                  /*
                                          * Current offset in the buffer
@@ -80,7 +84,7 @@ struct lib_ring_buffer {
  
         struct lib_ring_buffer_backend backend; /* Associated backend */
  
-       struct commit_counters_cold *commit_cold;
+       DECLARE_SHMP(struct commit_counters_cold, commit_cold);
                                         /* Commit count per sub-buffer */
         long active_readers;            /*
                                          * Active readers count
@@ -102,7 +106,7 @@ struct lib_ring_buffer {
         int get_subbuf:1;               /* Sub-buffer being held by reader */
         int switch_timer_enabled:1;     /* Protected by ring_buffer_nohz_lock */
         int read_timer_enabled:1;       /* Protected by ring_buffer_nohz_lock */
-};
+} ____cacheline_aligned;
  
  static inline
  void *channel_get_private(struct channel *chan)
diff --git a/libringbuffer/iterator.h b/libringbuffer/iterator.h

deleted file mode 100644 (file)

index 4914929..0000000
--- a/libringbuffer/iterator.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#ifndef _LINUX_RING_BUFFER_ITERATOR_H
-#define _LINUX_RING_BUFFER_ITERATOR_H
-
-/*
- * linux/ringbuffer/iterator.h
- *
- * (C) Copyright 2010 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
- *
- * Ring buffer and channel iterators.
- *
- * Author:
- *     Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
- *
- * Dual LGPL v2.1/GPL v2 license.
- */
-
-#include "backend.h"
-#include "frontend.h"
-
-/*
- * lib_ring_buffer_get_next_record advances the buffer read position to the next
- * record. It returns either the size of the next record, -EAGAIN if there is
- * currently no data available, or -ENODATA if no data is available and buffer
- * is finalized.
- */
-extern ssize_t lib_ring_buffer_get_next_record(struct channel *chan,
-                                              struct lib_ring_buffer *buf);
-
-/*
- * channel_get_next_record advances the buffer read position to the next record.
- * It returns either the size of the next record, -EAGAIN if there is currently
- * no data available, or -ENODATA if no data is available and buffer is
- * finalized.
- * Returns the current buffer in ret_buf.
- */
-extern ssize_t channel_get_next_record(struct channel *chan,
-                                      struct lib_ring_buffer **ret_buf);
-
-/**
- * read_current_record - copy the buffer current record into dest.
- * @buf: ring buffer
- * @dest: destination where the record should be copied
- *
- * dest should be large enough to contain the record. Returns the number of
- * bytes copied.
- */
-static inline size_t read_current_record(struct lib_ring_buffer *buf, void *dest)
-{
-       return lib_ring_buffer_read(&buf->backend, buf->iter.read_offset,
-                                   dest, buf->iter.payload_len);
-}
-
-extern int lib_ring_buffer_iterator_open(struct lib_ring_buffer *buf);
-extern void lib_ring_buffer_iterator_release(struct lib_ring_buffer *buf);
-extern int channel_iterator_open(struct channel *chan);
-extern void channel_iterator_release(struct channel *chan);
-
-extern const struct file_operations channel_payload_file_operations;
-extern const struct file_operations lib_ring_buffer_payload_file_operations;
-
-/*
- * Used internally.
- */
-int channel_iterator_init(struct channel *chan);
-void channel_iterator_unregister_notifiers(struct channel *chan);
-void channel_iterator_free(struct channel *chan);
-void channel_iterator_reset(struct channel *chan);
-void lib_ring_buffer_iterator_reset(struct lib_ring_buffer *buf);
-
-#endif /* _LINUX_RING_BUFFER_ITERATOR_H */
diff --git a/libringbuffer/ring_buffer_abi.c b/libringbuffer/ring_buffer_abi.c

index c105fe000763eedb0922c353860744e33af39a2f..56423276ef9d3dd3f6552886a6aee115e7554899 100644 (file)
--- a/libringbuffer/ring_buffer_abi.c
+++ b/libringbuffer/ring_buffer_abi.c
@@ -374,8 +374,3 @@ const struct file_operations lib_ring_buffer_file_operations = {
         .compat_ioctl = lib_ring_buffer_compat_ioctl,
  #endif
  };
-EXPORT_SYMBOL_GPL(lib_ring_buffer_file_operations);
-
-MODULE_LICENSE("GPL and additional rights");
-MODULE_AUTHOR("Mathieu Desnoyers");
-MODULE_DESCRIPTION("Ring Buffer Library VFS");
diff --git a/libringbuffer/ring_buffer_backend.c b/libringbuffer/ring_buffer_backend.c

index 861acf746572779eb71039bad1191b9ebb224bf8..8aa8c86fa7511a5d467815fa946f25277ceb87fc 100644 (file)
--- a/libringbuffer/ring_buffer_backend.c
+++ b/libringbuffer/ring_buffer_backend.c
@@ -13,6 +13,7 @@
  #include "config.h"
  #include "backend.h"
  #include "frontend.h"
+#include "smp.h"
  
  /**
   * lib_ring_buffer_backend_allocate - allocate a channel buffer
@@ -26,67 +27,49 @@ static
  int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config,
                                      struct lib_ring_buffer_backend *bufb,
                                      size_t size, size_t num_subbuf,
-                                    int extra_reader_sb)
+                                    int extra_reader_sb,
+                                    struct shm_header *shm_header)
  {
-       struct channel_backend *chanb = &bufb->chan->backend;
-       unsigned long j, num_pages, num_pages_per_subbuf, page_idx = 0;
+       struct channel_backend *chanb = &shmp(bufb->chan)->backend;
         unsigned long subbuf_size, mmap_offset = 0;
         unsigned long num_subbuf_alloc;
-       struct page **pages;
-       void **virt;
         unsigned long i;
  
-       num_pages = size >> get_count_order(PAGE_SIZE);
-       num_pages_per_subbuf = num_pages >> get_count_order(num_subbuf);
         subbuf_size = chanb->subbuf_size;
         num_subbuf_alloc = num_subbuf;
  
-       if (extra_reader_sb) {
-               num_pages += num_pages_per_subbuf; /* Add pages for reader */
+       if (extra_reader_sb)
                 num_subbuf_alloc++;
-       }
-
-       pages = malloc_align(sizeof(*pages) * num_pages);
-       if (unlikely(!pages))
-               goto pages_error;
  
-       virt = malloc_align(sizeof(*virt) * num_pages);
-       if (unlikely(!virt))
-               goto virt_error;
-
-       bufb->array = malloc_align(sizeof(*bufb->array) * num_subbuf_alloc);
-       if (unlikely(!bufb->array))
+       set_shmp(bufb->array, zalloc_shm(shm_header,
+                       sizeof(*bufb->array) * num_subbuf_alloc));
+       if (unlikely(!shmp(bufb->array)))
                 goto array_error;
  
-       for (i = 0; i < num_pages; i++) {
-               pages[i] = alloc_pages_node(cpu_to_node(max(bufb->cpu, 0)),
-                                           GFP_KERNEL | __GFP_ZERO, 0);
-               if (unlikely(!pages[i]))
-                       goto depopulate;
-               virt[i] = page_address(pages[i]);
-       }
-       bufb->num_pages_per_subbuf = num_pages_per_subbuf;
+       set_shmp(bufb->memory_map, zalloc_shm(shm_header,
+                       subbuf_size * num_subbuf_alloc));
+       if (unlikely(!shmp(bufb->memory_map)))
+               goto memory_map_error;
  
         /* Allocate backend pages array elements */
         for (i = 0; i < num_subbuf_alloc; i++) {
-               bufb->array[i] =
-                       zmalloc_align(
+               set_shmp(bufb->array[i],
+                       zalloc_shm(shm_header,
                                 sizeof(struct lib_ring_buffer_backend_pages) +
-                               sizeof(struct lib_ring_buffer_backend_page)
-                               * num_pages_per_subbuf);
-               if (!bufb->array[i])
+                               subbuf_size));
+               if (!shmp(bufb->array[i]))
                         goto free_array;
         }
  
         /* Allocate write-side subbuffer table */
-       bufb->buf_wsb = zmalloc_align(
+       bufb->buf_wsb = zalloc_shm(shm_header,
                                 sizeof(struct lib_ring_buffer_backend_subbuffer)
                                 * num_subbuf);
-       if (unlikely(!bufb->buf_wsb))
+       if (unlikely(!shmp(bufb->buf_wsb)))
                 goto free_array;
  
         for (i = 0; i < num_subbuf; i++)
-               bufb->buf_wsb[i].id = subbuffer_id(config, 0, 1, i);
+               shmp(bufb->buf_wsb)[i].id = subbuffer_id(config, 0, 1, i);
  
         /* Assign read-side subbuffer table */
         if (extra_reader_sb)
@@ -97,73 +80,50 @@ int lib_ring_buffer_backend_allocate(const struct lib_ring_buffer_config *config
  
         /* Assign pages to page index */
         for (i = 0; i < num_subbuf_alloc; i++) {
-               for (j = 0; j < num_pages_per_subbuf; j++) {
-                       CHAN_WARN_ON(chanb, page_idx > num_pages);
-                       bufb->array[i]->p[j].virt = virt[page_idx];
-                       bufb->array[i]->p[j].page = pages[page_idx];
-                       page_idx++;
-               }
+               set_shmp(shmp(bufb->array)[i]->p,
+                        &shmp(bufb->memory_map)[i * subbuf_size]);
                 if (config->output == RING_BUFFER_MMAP) {
-                       bufb->array[i]->mmap_offset = mmap_offset;
+                       shmp(bufb->array)[i]->mmap_offset = mmap_offset;
                         mmap_offset += subbuf_size;
                 }
         }
  
-       kfree(virt);
-       kfree(pages);
         return 0;
  
  free_array:
-       for (i = 0; (i < num_subbuf_alloc && bufb->array[i]); i++)
-               kfree(bufb->array[i]);
-depopulate:
-       /* Free all allocated pages */
-       for (i = 0; (i < num_pages && pages[i]); i++)
-               __free_page(pages[i]);
-       kfree(bufb->array);
+       /* bufb->array[i] will be freed by shm teardown */
+memory_map_error:
+       /* bufb->array will be freed by shm teardown */
  array_error:
-       kfree(virt);
-virt_error:
-       kfree(pages);
-pages_error:
         return -ENOMEM;
  }
  
  int lib_ring_buffer_backend_create(struct lib_ring_buffer_backend *bufb,
-                                  struct channel_backend *chanb, int cpu)
+                                  struct channel_backend *chanb, int cpu,
+                                  struct shm_header *shm_header)
  {
         const struct lib_ring_buffer_config *config = chanb->config;
  
-       bufb->chan = caa_container_of(chanb, struct channel, backend);
+       set_shmp(&bufb->chan, caa_container_of(chanb, struct channel, backend));
         bufb->cpu = cpu;
  
         return lib_ring_buffer_backend_allocate(config, bufb, chanb->buf_size,
                                                 chanb->num_subbuf,
-                                               chanb->extra_reader_sb);
+                                               chanb->extra_reader_sb,
+                                               shm_header);
  }
  
  void lib_ring_buffer_backend_free(struct lib_ring_buffer_backend *bufb)
  {
-       struct channel_backend *chanb = &bufb->chan->backend;
-       unsigned long i, j, num_subbuf_alloc;
-
-       num_subbuf_alloc = chanb->num_subbuf;
-       if (chanb->extra_reader_sb)
-               num_subbuf_alloc++;
-
-       kfree(bufb->buf_wsb);
-       for (i = 0; i < num_subbuf_alloc; i++) {
-               for (j = 0; j < bufb->num_pages_per_subbuf; j++)
-                       __free_page(bufb->array[i]->p[j].page);
-               kfree(bufb->array[i]);
-       }
-       kfree(bufb->array);
+       /* bufb->buf_wsb will be freed by shm teardown */
+       /* bufb->array[i] will be freed by shm teardown */
+       /* bufb->array will be freed by shm teardown */
         bufb->allocated = 0;
  }
  
  void lib_ring_buffer_backend_reset(struct lib_ring_buffer_backend *bufb)
  {
-       struct channel_backend *chanb = &bufb->chan->backend;
+       struct channel_backend *chanb = &shmp(bufb->chan)->backend;
         const struct lib_ring_buffer_config *config = chanb->config;
         unsigned long num_subbuf_alloc;
         unsigned int i;
@@ -173,7 +133,7 @@ void lib_ring_buffer_backend_reset(struct lib_ring_buffer_backend *bufb)
                 num_subbuf_alloc++;
  
         for (i = 0; i < chanb->num_subbuf; i++)
-               bufb->buf_wsb[i].id = subbuffer_id(config, 0, 1, i);
+               shmp(bufb->buf_wsb)[i].id = subbuffer_id(config, 0, 1, i);
         if (chanb->extra_reader_sb)
                 bufb->buf_rsb.id = subbuffer_id(config, 0, 1,
                                                 num_subbuf_alloc - 1);
@@ -182,9 +142,9 @@ void lib_ring_buffer_backend_reset(struct lib_ring_buffer_backend *bufb)
  
         for (i = 0; i < num_subbuf_alloc; i++) {
                 /* Don't reset mmap_offset */
-               v_set(config, &bufb->array[i]->records_commit, 0);
-               v_set(config, &bufb->array[i]->records_unread, 0);
-               bufb->array[i]->data_size = 0;
+               v_set(config, &shmp(bufb->array)[i]->records_commit, 0);
+               v_set(config, &shmp(bufb->array)[i]->records_unread, 0);
+               shmp(bufb->array)[i]->data_size = 0;
                 /* Don't reset backend page and virt addresses */
         }
         /* Don't reset num_pages_per_subbuf, cpu, allocated */
@@ -208,52 +168,6 @@ void channel_backend_reset(struct channel_backend *chanb)
         chanb->start_tsc = config->cb.ring_buffer_clock_read(chan);
  }
  
-#ifdef CONFIG_HOTPLUG_CPU
-/**
- *     lib_ring_buffer_cpu_hp_callback - CPU hotplug callback
- *     @nb: notifier block
- *     @action: hotplug action to take
- *     @hcpu: CPU number
- *
- *     Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD)
- */
-static
-int __cpuinit lib_ring_buffer_cpu_hp_callback(struct notifier_block *nb,
-                                             unsigned long action,
-                                             void *hcpu)
-{
-       unsigned int cpu = (unsigned long)hcpu;
-       struct channel_backend *chanb = caa_container_of(nb, struct channel_backend,
-                                                    cpu_hp_notifier);
-       const struct lib_ring_buffer_config *config = chanb->config;
-       struct lib_ring_buffer *buf;
-       int ret;
-
-       CHAN_WARN_ON(chanb, config->alloc == RING_BUFFER_ALLOC_GLOBAL);
-
-       switch (action) {
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-               buf = per_cpu_ptr(chanb->buf, cpu);
-               ret = lib_ring_buffer_create(buf, chanb, cpu);
-               if (ret) {
-                       printk(KERN_ERR
-                         "ring_buffer_cpu_hp_callback: cpu %d "
-                         "buffer creation failed\n", cpu);
-                       return NOTIFY_BAD;
-               }
-               break;
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               /* No need to do a buffer switch here, because it will happen
-                * when tracing is stopped, or will be done by switch timer CPU
-                * DEAD callback. */
-               break;
-       }
-       return NOTIFY_OK;
-}
-#endif
-
  /**
   * channel_backend_init - initialize a channel backend
   * @chanb: channel backend
@@ -263,6 +177,7 @@ int __cpuinit lib_ring_buffer_cpu_hp_callback(struct notifier_block *nb,
   * @parent: dentry of parent directory, %NULL for root directory
   * @subbuf_size: size of sub-buffers (> PAGE_SIZE, power of 2)
   * @num_subbuf: number of sub-buffers (power of 2)
+ * @shm_header: shared memory header
   *
   * Returns channel pointer if successful, %NULL otherwise.
   *
@@ -275,7 +190,8 @@ int __cpuinit lib_ring_buffer_cpu_hp_callback(struct notifier_block *nb,
  int channel_backend_init(struct channel_backend *chanb,
                          const char *name,
                          const struct lib_ring_buffer_config *config,
-                        void *priv, size_t subbuf_size, size_t num_subbuf)
+                        void *priv, size_t subbuf_size, size_t num_subbuf,
+                        struct shm_header *shm_header)
  {
         struct channel *chan = caa_container_of(chanb, struct channel, backend);
         unsigned int i;
@@ -310,58 +226,42 @@ int channel_backend_init(struct channel_backend *chanb,
         chanb->extra_reader_sb =
                         (config->mode == RING_BUFFER_OVERWRITE) ? 1 : 0;
         chanb->num_subbuf = num_subbuf;
-       strlcpy(chanb->name, name, NAME_MAX);
+       strncpy(chanb->name, name, NAME_MAX);
+       chanb->name[NAME_MAX - 1] = '\0';
         chanb->config = config;
  
         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
-               if (!zalloc_cpumask_var(&chanb->cpumask, GFP_KERNEL))
-                       return -ENOMEM;
-       }
+               struct lib_ring_buffer *buf;
+               size_t alloc_size;
  
-       if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
                 /* Allocating the buffer per-cpu structures */
-               chanb->buf = alloc_percpu(struct lib_ring_buffer);
-               if (!chanb->buf)
-                       goto free_cpumask;
+               alloc_size = sizeof(struct lib_ring_buffer);
+               buf = zalloc_shm(shm_header, alloc_size * num_possible_cpus());
+               if (!buf)
+                       goto end;
+               set_shmp(chanb->buf, buf);
  
                 /*
-                * In case of non-hotplug cpu, if the ring-buffer is allocated
-                * in early initcall, it will not be notified of secondary cpus.
-                * In that off case, we need to allocate for all possible cpus.
-                */
-#ifdef CONFIG_HOTPLUG_CPU
-               /*
-                * buf->backend.allocated test takes care of concurrent CPU
-                * hotplug.
-                * Priority higher than frontend, so we create the ring buffer
-                * before we start the timer.
+                * We need to allocate for all possible cpus.
                  */
-               chanb->cpu_hp_notifier.notifier_call =
-                               lib_ring_buffer_cpu_hp_callback;
-               chanb->cpu_hp_notifier.priority = 5;
-               register_hotcpu_notifier(&chanb->cpu_hp_notifier);
-
-               get_online_cpus();
-               for_each_online_cpu(i) {
-                       ret = lib_ring_buffer_create(per_cpu_ptr(chanb->buf, i),
-                                                chanb, i);
-                       if (ret)
-                               goto free_bufs; /* cpu hotplug locked */
-               }
-               put_online_cpus();
-#else
                 for_each_possible_cpu(i) {
-                       ret = lib_ring_buffer_create(per_cpu_ptr(chanb->buf, i),
-                                                chanb, i);
+                       ret = lib_ring_buffer_create(&shmp(chanb->buf)[i],
+                                                    chanb, i, shm_header);
                         if (ret)
                                 goto free_bufs; /* cpu hotplug locked */
                 }
-#endif
         } else {
-               chanb->buf = kzalloc(sizeof(struct lib_ring_buffer), GFP_KERNEL);
-               if (!chanb->buf)
-                       goto free_cpumask;
-               ret = lib_ring_buffer_create(chanb->buf, chanb, -1);
+               struct lib_ring_buffer *buf;
+               size_t alloc_size;
+
+               alloc_size = sizeof(struct lib_ring_buffer);
+               chanb->buf = zmalloc(sizeof(struct lib_ring_buffer));
+               buf = zalloc_shm(shm_header, alloc_size);
+               if (!buf)
+                       goto end;
+               set_shmp(chanb->buf, buf);
+               ret = lib_ring_buffer_create(shmp(chanb->buf), chanb, -1,
+                                            shm_header);
                 if (ret)
                         goto free_bufs;
         }
@@ -372,38 +272,18 @@ int channel_backend_init(struct channel_backend *chanb,
  free_bufs:
         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
                 for_each_possible_cpu(i) {
-                       struct lib_ring_buffer *buf = per_cpu_ptr(chanb->buf, i);
+                       struct lib_ring_buffer *buf = &shmp(chanb->buf)[i];
  
                         if (!buf->backend.allocated)
                                 continue;
                         lib_ring_buffer_free(buf);
                 }
-#ifdef CONFIG_HOTPLUG_CPU
-               put_online_cpus();
-#endif
-               free_percpu(chanb->buf);
-       } else
-               kfree(chanb->buf);
-free_cpumask:
-       if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
-               free_cpumask_var(chanb->cpumask);
+       }
+       /* We only free the buffer data upon shm teardown */
+end:
         return -ENOMEM;
  }
  
-/**
- * channel_backend_unregister_notifiers - unregister notifiers
- * @chan: the channel
- *
- * Holds CPU hotplug.
- */
-void channel_backend_unregister_notifiers(struct channel_backend *chanb)
-{
-       const struct lib_ring_buffer_config *config = chanb->config;
-
-       if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
-               unregister_hotcpu_notifier(&chanb->cpu_hp_notifier);
-}
-
  /**
   * channel_backend_free - destroy the channel
   * @chan: the channel
@@ -417,67 +297,21 @@ void channel_backend_free(struct channel_backend *chanb)
  
         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
                 for_each_possible_cpu(i) {
-                       struct lib_ring_buffer *buf = per_cpu_ptr(chanb->buf, i);
+                       struct lib_ring_buffer *buf = &shmp(chanb->buf)[i];
  
                         if (!buf->backend.allocated)
                                 continue;
                         lib_ring_buffer_free(buf);
                 }
-               free_cpumask_var(chanb->cpumask);
-               free_percpu(chanb->buf);
         } else {
-               struct lib_ring_buffer *buf = chanb->buf;
+               struct lib_ring_buffer *buf = shmp(chanb->buf);
  
                 CHAN_WARN_ON(chanb, !buf->backend.allocated);
                 lib_ring_buffer_free(buf);
-               kfree(buf);
         }
+       /* We only free the buffer data upon shm teardown */
  }
  
-/**
- * lib_ring_buffer_write - write data to a ring_buffer buffer.
- * @bufb : buffer backend
- * @offset : offset within the buffer
- * @src : source address
- * @len : length to write
- * @pagecpy : page size copied so far
- */
-void _lib_ring_buffer_write(struct lib_ring_buffer_backend *bufb, size_t offset,
-                           const void *src, size_t len, ssize_t pagecpy)
-{
-       struct channel_backend *chanb = &bufb->chan->backend;
-       const struct lib_ring_buffer_config *config = chanb->config;
-       size_t sbidx, index;
-       struct lib_ring_buffer_backend_pages *rpages;
-       unsigned long sb_bindex, id;
-
-       do {
-               len -= pagecpy;
-               src += pagecpy;
-               offset += pagecpy;
-               sbidx = offset >> chanb->subbuf_size_order;
-               index = (offset & (chanb->subbuf_size - 1)) >> get_count_order(PAGE_SIZE);
-
-               /*
-                * Underlying layer should never ask for writes across
-                * subbuffers.
-                */
-               CHAN_WARN_ON(chanb, offset >= chanb->buf_size);
-
-               pagecpy = min_t(size_t, len, PAGE_SIZE - (offset & ~PAGE_MASK));
-               id = bufb->buf_wsb[sbidx].id;
-               sb_bindex = subbuffer_id_get_index(config, id);
-               rpages = bufb->array[sb_bindex];
-               CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE
-                            && subbuffer_id_is_noref(config, id));
-               lib_ring_buffer_do_copy(config,
-                                       rpages->p[index].virt
-                                               + (offset & ~PAGE_MASK),
-                                       src, pagecpy);
-       } while (unlikely(len != pagecpy));
-}
-EXPORT_SYMBOL_GPL(_lib_ring_buffer_write);
-
  /**
   * lib_ring_buffer_read - read data from ring_buffer_buffer.
   * @bufb : buffer backend
@@ -491,42 +325,30 @@ EXPORT_SYMBOL_GPL(_lib_ring_buffer_write);
  size_t lib_ring_buffer_read(struct lib_ring_buffer_backend *bufb, size_t offset,
                             void *dest, size_t len)
  {
-       struct channel_backend *chanb = &bufb->chan->backend;
+       struct channel_backend *chanb = &shmp(bufb->chan)->backend;
         const struct lib_ring_buffer_config *config = chanb->config;
-       size_t index;
-       ssize_t pagecpy, orig_len;
+       ssize_t orig_len;
         struct lib_ring_buffer_backend_pages *rpages;
         unsigned long sb_bindex, id;
  
         orig_len = len;
         offset &= chanb->buf_size - 1;
-       index = (offset & (chanb->subbuf_size - 1)) >> get_count_order(PAGE_SIZE);
+
         if (unlikely(!len))
                 return 0;
-       for (;;) {
-               pagecpy = min_t(size_t, len, PAGE_SIZE - (offset & ~PAGE_MASK));
-               id = bufb->buf_rsb.id;
-               sb_bindex = subbuffer_id_get_index(config, id);
-               rpages = bufb->array[sb_bindex];
-               CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE
-                            && subbuffer_id_is_noref(config, id));
-               memcpy(dest, rpages->p[index].virt + (offset & ~PAGE_MASK),
-                      pagecpy);
-               len -= pagecpy;
-               if (likely(!len))
-                       break;
-               dest += pagecpy;
-               offset += pagecpy;
-               index = (offset & (chanb->subbuf_size - 1)) >> get_count_order(PAGE_SIZE);
-               /*
-                * Underlying layer should never ask for reads across
-                * subbuffers.
-                */
-               CHAN_WARN_ON(chanb, offset >= chanb->buf_size);
-       }
+       id = bufb->buf_rsb.id;
+       sb_bindex = subbuffer_id_get_index(config, id);
+       rpages = shmp(bufb->array)[sb_bindex];
+       /*
+        * Underlying layer should never ask for reads across
+        * subbuffers.
+        */
+       CHAN_WARN_ON(chanb, offset >= chanb->buf_size);
+       CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE
+                    && subbuffer_id_is_noref(config, id));
+       memcpy(dest, shmp(rpages->p) + (offset & ~(chanb->subbuf_size - 1)), len);
         return orig_len;
  }
-EXPORT_SYMBOL_GPL(lib_ring_buffer_read);
  
  /**
   * lib_ring_buffer_read_cstr - read a C-style string from ring_buffer.
@@ -541,79 +363,33 @@ EXPORT_SYMBOL_GPL(lib_ring_buffer_read);
  int lib_ring_buffer_read_cstr(struct lib_ring_buffer_backend *bufb, size_t offset,
                               void *dest, size_t len)
  {
-       struct channel_backend *chanb = &bufb->chan->backend;
+       struct channel_backend *chanb = &shmp(bufb->chan)->backend;
         const struct lib_ring_buffer_config *config = chanb->config;
-       size_t index;
-       ssize_t pagecpy, pagelen, strpagelen, orig_offset;
+       ssize_t string_len, orig_offset;
         char *str;
         struct lib_ring_buffer_backend_pages *rpages;
         unsigned long sb_bindex, id;
  
         offset &= chanb->buf_size - 1;
-       index = (offset & (chanb->subbuf_size - 1)) >> get_count_order(PAGE_SIZE);
         orig_offset = offset;
-       for (;;) {
-               id = bufb->buf_rsb.id;
-               sb_bindex = subbuffer_id_get_index(config, id);
-               rpages = bufb->array[sb_bindex];
-               CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE
-                            && subbuffer_id_is_noref(config, id));
-               str = (char *)rpages->p[index].virt + (offset & ~PAGE_MASK);
-               pagelen = PAGE_SIZE - (offset & ~PAGE_MASK);
-               strpagelen = strnlen(str, pagelen);
-               if (len) {
-                       pagecpy = min_t(size_t, len, strpagelen);
-                       if (dest) {
-                               memcpy(dest, str, pagecpy);
-                               dest += pagecpy;
-                       }
-                       len -= pagecpy;
-               }
-               offset += strpagelen;
-               index = (offset & (chanb->subbuf_size - 1)) >> get_count_order(PAGE_SIZE);
-               if (strpagelen < pagelen)
-                       break;
-               /*
-                * Underlying layer should never ask for reads across
-                * subbuffers.
-                */
-               CHAN_WARN_ON(chanb, offset >= chanb->buf_size);
-       }
-       if (dest && len)
-               ((char *)dest)[0] = 0;
-       return offset - orig_offset;
-}
-EXPORT_SYMBOL_GPL(lib_ring_buffer_read_cstr);
-
-/**
- * lib_ring_buffer_read_get_page - Get a whole page to read from
- * @bufb : buffer backend
- * @offset : offset within the buffer
- * @virt : pointer to page address (output)
- *
- * Should be protected by get_subbuf/put_subbuf.
- * Returns the pointer to the page struct pointer.
- */
-struct page **lib_ring_buffer_read_get_page(struct lib_ring_buffer_backend *bufb,
-                                           size_t offset, void ***virt)
-{
-       size_t index;
-       struct lib_ring_buffer_backend_pages *rpages;
-       struct channel_backend *chanb = &bufb->chan->backend;
-       const struct lib_ring_buffer_config *config = chanb->config;
-       unsigned long sb_bindex, id;
-
-       offset &= chanb->buf_size - 1;
-       index = (offset & (chanb->subbuf_size - 1)) >> get_count_order(PAGE_SIZE);
         id = bufb->buf_rsb.id;
         sb_bindex = subbuffer_id_get_index(config, id);
-       rpages = bufb->array[sb_bindex];
+       rpages = shmp(bufb->array)[sb_bindex];
+       /*
+        * Underlying layer should never ask for reads across
+        * subbuffers.
+        */
+       CHAN_WARN_ON(chanb, offset >= chanb->buf_size);
         CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE
                      && subbuffer_id_is_noref(config, id));
-       *virt = &rpages->p[index].virt;
-       return &rpages->p[index].page;
+       str = (char *)shmp(rpages->p) + (offset & ~(chanb->subbuf_size - 1));
+       string_len = strnlen(str, len);
+       if (dest && len) {
+               memcpy(dest, str, string_len);
+               ((char *)dest)[0] = 0;
+       }
+       return offset - orig_offset;
  }
-EXPORT_SYMBOL_GPL(lib_ring_buffer_read_get_page);
  
  /**
   * lib_ring_buffer_read_offset_address - get address of a buffer location
@@ -628,22 +404,19 @@ EXPORT_SYMBOL_GPL(lib_ring_buffer_read_get_page);
  void *lib_ring_buffer_read_offset_address(struct lib_ring_buffer_backend *bufb,
                                           size_t offset)
  {
-       size_t index;
         struct lib_ring_buffer_backend_pages *rpages;
-       struct channel_backend *chanb = &bufb->chan->backend;
+       struct channel_backend *chanb = &shmp(bufb->chan)->backend;
         const struct lib_ring_buffer_config *config = chanb->config;
         unsigned long sb_bindex, id;
  
         offset &= chanb->buf_size - 1;
-       index = (offset & (chanb->subbuf_size - 1)) >> get_count_order(PAGE_SIZE);
         id = bufb->buf_rsb.id;
         sb_bindex = subbuffer_id_get_index(config, id);
-       rpages = bufb->array[sb_bindex];
+       rpages = shmp(bufb->array)[sb_bindex];
         CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE
                      && subbuffer_id_is_noref(config, id));
-       return rpages->p[index].virt + (offset & ~PAGE_MASK);
+       return shmp(rpages->p) + (offset & ~(chanb->subbuf_size - 1));
  }
-EXPORT_SYMBOL_GPL(lib_ring_buffer_read_offset_address);
  
  /**
   * lib_ring_buffer_offset_address - get address of a location within the buffer
@@ -658,20 +431,18 @@ EXPORT_SYMBOL_GPL(lib_ring_buffer_read_offset_address);
  void *lib_ring_buffer_offset_address(struct lib_ring_buffer_backend *bufb,
                                      size_t offset)
  {
-       size_t sbidx, index;
+       size_t sbidx;
         struct lib_ring_buffer_backend_pages *rpages;
-       struct channel_backend *chanb = &bufb->chan->backend;
+       struct channel_backend *chanb = &shmp(bufb->chan)->backend;
         const struct lib_ring_buffer_config *config = chanb->config;
         unsigned long sb_bindex, id;
  
         offset &= chanb->buf_size - 1;
         sbidx = offset >> chanb->subbuf_size_order;
-       index = (offset & (chanb->subbuf_size - 1)) >> get_count_order(PAGE_SIZE);
-       id = bufb->buf_wsb[sbidx].id;
+       id = shmp(bufb->buf_wsb)[sbidx].id;
         sb_bindex = subbuffer_id_get_index(config, id);
-       rpages = bufb->array[sb_bindex];
+       rpages = shmp(bufb->array)[sb_bindex];
         CHAN_WARN_ON(chanb, config->mode == RING_BUFFER_OVERWRITE
                      && subbuffer_id_is_noref(config, id));
-       return rpages->p[index].virt + (offset & ~PAGE_MASK);
+       return shmp(rpages->p) + (offset & ~(chanb->subbuf_size - 1));
  }
-EXPORT_SYMBOL_GPL(lib_ring_buffer_offset_address);
diff --git a/libringbuffer/ring_buffer_frontend.c b/libringbuffer/ring_buffer_frontend.c

index 5ceda871a907912d517c506f103b8686a0b4fea5..2b4ecc24b1c44769afe455f40773b6c6db5233c8 100644 (file)
--- a/libringbuffer/ring_buffer_frontend.c
+++ b/libringbuffer/ring_buffer_frontend.c
@@ -38,13 +38,16 @@
   * Dual LGPL v2.1/GPL v2 license.
   */
  
+#include <sys/types.h>
+#include <sys/shm.h>
  #include <urcu/compiler.h>
+#include <urcu/ref.h>
  
+#include "smp.h"
  #include "config.h"
  #include "backend.h"
  #include "frontend.h"
-#include "iterator.h"
-#include "nohz.h"
+#include "shm.h"
  
  /*
   * Internal structure representing offsets to use at a sub-buffer switch.
@@ -56,20 +59,7 @@ struct switch_offsets {
                      switch_old_end:1;
  };
  
-#ifdef CONFIG_NO_HZ
-enum tick_nohz_val {
-       TICK_NOHZ_STOP,
-       TICK_NOHZ_FLUSH,
-       TICK_NOHZ_RESTART,
-};
-
-static ATOMIC_NOTIFIER_HEAD(tick_nohz_notifier);
-#endif /* CONFIG_NO_HZ */
-
-static DEFINE_PER_CPU(spinlock_t, ring_buffer_nohz_lock);
-
-DEFINE_PER_CPU(unsigned int, lib_ring_buffer_nesting);
-EXPORT_PER_CPU_SYMBOL(lib_ring_buffer_nesting);
+__thread unsigned int lib_ring_buffer_nesting;
  
  static
  void lib_ring_buffer_print_errors(struct channel *chan,
@@ -80,11 +70,11 @@ void lib_ring_buffer_print_errors(struct channel *chan,
   */
  void lib_ring_buffer_free(struct lib_ring_buffer *buf)
  {
-       struct channel *chan = buf->backend.chan;
+       struct channel *chan = shmp(buf->backend.chan);
  
         lib_ring_buffer_print_errors(chan, buf, buf->backend.cpu);
-       kfree(buf->commit_hot);
-       kfree(buf->commit_cold);
+       free(shmp(buf->commit_hot));
+       free(shmp(buf->commit_cold));
  
         lib_ring_buffer_backend_free(&buf->backend);
  }
@@ -100,7 +90,7 @@ void lib_ring_buffer_free(struct lib_ring_buffer *buf)
   */
  void lib_ring_buffer_reset(struct lib_ring_buffer *buf)
  {
-       struct channel *chan = buf->backend.chan;
+       struct channel *chan = shmp(buf->backend.chan);
         const struct lib_ring_buffer_config *config = chan->backend.config;
         unsigned int i;
  
@@ -108,15 +98,14 @@ void lib_ring_buffer_reset(struct lib_ring_buffer *buf)
          * Reset iterator first. It will put the subbuffer if it currently holds
          * it.
          */
-       lib_ring_buffer_iterator_reset(buf);
         v_set(config, &buf->offset, 0);
         for (i = 0; i < chan->backend.num_subbuf; i++) {
-               v_set(config, &buf->commit_hot[i].cc, 0);
-               v_set(config, &buf->commit_hot[i].seq, 0);
-               v_set(config, &buf->commit_cold[i].cc_sb, 0);
+               v_set(config, &shmp(buf->commit_hot)[i].cc, 0);
+               v_set(config, &shmp(buf->commit_hot)[i].seq, 0);
+               v_set(config, &shmp(buf->commit_cold)[i].cc_sb, 0);
         }
-       atomic_long_set(&buf->consumed, 0);
-       atomic_set(&buf->record_disabled, 0);
+       uatomic_set(&buf->consumed, 0);
+       uatomic_set(&buf->record_disabled, 0);
         v_set(config, &buf->last_tsc, 0);
         lib_ring_buffer_backend_reset(&buf->backend);
         /* Don't reset number of active readers */
@@ -127,7 +116,6 @@ void lib_ring_buffer_reset(struct lib_ring_buffer *buf)
         v_set(config, &buf->records_overrun, 0);
         buf->finalized = 0;
  }
-EXPORT_SYMBOL_GPL(lib_ring_buffer_reset);
  
  /**
   * channel_reset - Reset channel to initial values.
@@ -143,21 +131,20 @@ void channel_reset(struct channel *chan)
         /*
          * Reset iterators first. Will put the subbuffer if held for reading.
          */
-       channel_iterator_reset(chan);
-       atomic_set(&chan->record_disabled, 0);
+       uatomic_set(&chan->record_disabled, 0);
         /* Don't reset commit_count_mask, still valid */
         channel_backend_reset(&chan->backend);
         /* Don't reset switch/read timer interval */
         /* Don't reset notifiers and notifier enable bits */
         /* Don't reset reader reference count */
  }
-EXPORT_SYMBOL_GPL(channel_reset);
  
  /*
   * Must be called under cpu hotplug protection.
   */
  int lib_ring_buffer_create(struct lib_ring_buffer *buf,
-                          struct channel_backend *chanb, int cpu)
+                          struct channel_backend *chanb, int cpu,
+                          struct shm_header *shm_header)
  {
         const struct lib_ring_buffer_config *config = chanb->config;
         struct channel *chan = caa_container_of(chanb, struct channel, backend);
@@ -171,39 +158,29 @@ int lib_ring_buffer_create(struct lib_ring_buffer *buf,
         if (buf->backend.allocated)
                 return 0;
  
-       /*
-        * Paranoia: per cpu dynamic allocation is not officially documented as
-        * zeroing the memory, so let's do it here too, just in case.
-        */
-       memset(buf, 0, sizeof(*buf));
-
-       ret = lib_ring_buffer_backend_create(&buf->backend, &chan->backend, cpu);
+       ret = lib_ring_buffer_backend_create(&buf->backend, &chan->backend,
+                       cpu, shm_header);
         if (ret)
                 return ret;
  
-       buf->commit_hot =
-               kzalloc_node(ALIGN(sizeof(*buf->commit_hot)
-                                  * chan->backend.num_subbuf,
-                                  1 << INTERNODE_CACHE_SHIFT),
-                       GFP_KERNEL, cpu_to_node(max(cpu, 0)));
-       if (!buf->commit_hot) {
+       set_shmp(&buf->commit_hot,
+                zalloc_shm(shm_header,
+                       sizeof(*buf->commit_hot) * chan->backend.num_subbuf));
+       if (!shmp(buf->commit_hot)) {
                 ret = -ENOMEM;
                 goto free_chanbuf;
         }
  
-       buf->commit_cold =
-               kzalloc_node(ALIGN(sizeof(*buf->commit_cold)
-                                  * chan->backend.num_subbuf,
-                                  1 << INTERNODE_CACHE_SHIFT),
-                       GFP_KERNEL, cpu_to_node(max(cpu, 0)));
-       if (!buf->commit_cold) {
+       set_shmp(&buf->commit_cold,
+                zalloc_shm(shm_header,
+                       sizeof(*buf->commit_cold) * chan->backend.num_subbuf));
+       if (!shmp(buf->commit_cold)) {
                 ret = -ENOMEM;
                 goto free_commit;
         }
  
         num_subbuf = chan->backend.num_subbuf;
-       init_waitqueue_head(&buf->read_wait);
-       raw_spin_lock_init(&buf->raw_tick_nohz_spinlock);
+       //init_waitqueue_head(&buf->read_wait);
  
         /*
          * Write the subbuffer header for first subbuffer so we know the total
@@ -211,38 +188,24 @@ int lib_ring_buffer_create(struct lib_ring_buffer *buf,
          */
         subbuf_header_size = config->cb.subbuffer_header_size();
         v_set(config, &buf->offset, subbuf_header_size);
-       subbuffer_id_clear_noref(config, &buf->backend.buf_wsb[0].id);
-       tsc = config->cb.ring_buffer_clock_read(buf->backend.chan);
+       subbuffer_id_clear_noref(config, &shmp(buf->backend.buf_wsb)[0].id);
+       tsc = config->cb.ring_buffer_clock_read(shmp(buf->backend.chan));
         config->cb.buffer_begin(buf, tsc, 0);
-       v_add(config, subbuf_header_size, &buf->commit_hot[0].cc);
+       v_add(config, subbuf_header_size, &shmp(buf->commit_hot)[0].cc);
  
         if (config->cb.buffer_create) {
                 ret = config->cb.buffer_create(buf, priv, cpu, chanb->name);
                 if (ret)
                         goto free_init;
         }
-
-       /*
-        * Ensure the buffer is ready before setting it to allocated and setting
-        * the cpumask.
-        * Used for cpu hotplug vs cpumask iteration.
-        */
-       smp_wmb();
         buf->backend.allocated = 1;
-
-       if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
-               CHAN_WARN_ON(chan, cpumask_test_cpu(cpu,
-                            chan->backend.cpumask));
-               cpumask_set_cpu(cpu, chan->backend.cpumask);
-       }
-
         return 0;
  
         /* Error handling */
  free_init:
-       kfree(buf->commit_cold);
+       /* commit_cold will be freed by shm teardown */
  free_commit:
-       kfree(buf->commit_hot);
+       /* commit_hot will be freed by shm teardown */
  free_chanbuf:
         lib_ring_buffer_backend_free(&buf->backend);
         return ret;
@@ -251,55 +214,52 @@ free_chanbuf:
  static void switch_buffer_timer(unsigned long data)
  {
         struct lib_ring_buffer *buf = (struct lib_ring_buffer *)data;
-       struct channel *chan = buf->backend.chan;
+       struct channel *chan = shmp(buf->backend.chan);
         const struct lib_ring_buffer_config *config = chan->backend.config;
  
         /*
          * Only flush buffers periodically if readers are active.
          */
-       if (atomic_long_read(&buf->active_readers))
+       if (uatomic_read(&buf->active_readers))
                 lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE);
  
-       if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
-               mod_timer_pinned(&buf->switch_timer,
-                                jiffies + chan->switch_timer_interval);
-       else
-               mod_timer(&buf->switch_timer,
-                         jiffies + chan->switch_timer_interval);
+       //TODO timers
+       //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
+       //      mod_timer_pinned(&buf->switch_timer,
+       //                       jiffies + chan->switch_timer_interval);
+       //else
+       //      mod_timer(&buf->switch_timer,
+       //                jiffies + chan->switch_timer_interval);
  }
  
-/*
- * Called with ring_buffer_nohz_lock held for per-cpu buffers.
- */
  static void lib_ring_buffer_start_switch_timer(struct lib_ring_buffer *buf)
  {
-       struct channel *chan = buf->backend.chan;
+       struct channel *chan = shmp(buf->backend.chan);
         const struct lib_ring_buffer_config *config = chan->backend.config;
  
         if (!chan->switch_timer_interval || buf->switch_timer_enabled)
                 return;
-       init_timer(&buf->switch_timer);
-       buf->switch_timer.function = switch_buffer_timer;
-       buf->switch_timer.expires = jiffies + chan->switch_timer_interval;
-       buf->switch_timer.data = (unsigned long)buf;
-       if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
-               add_timer_on(&buf->switch_timer, buf->backend.cpu);
-       else
-               add_timer(&buf->switch_timer);
+       //TODO
+       //init_timer(&buf->switch_timer);
+       //buf->switch_timer.function = switch_buffer_timer;
+       //buf->switch_timer.expires = jiffies + chan->switch_timer_interval;
+       //buf->switch_timer.data = (unsigned long)buf;
+       //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
+       //      add_timer_on(&buf->switch_timer, buf->backend.cpu);
+       //else
+       //      add_timer(&buf->switch_timer);
         buf->switch_timer_enabled = 1;
  }
  
-/*
- * Called with ring_buffer_nohz_lock held for per-cpu buffers.
- */
  static void lib_ring_buffer_stop_switch_timer(struct lib_ring_buffer *buf)
  {
-       struct channel *chan = buf->backend.chan;
+       struct channel *chan = shmp(buf->backend.chan);
  
         if (!chan->switch_timer_interval || !buf->switch_timer_enabled)
                 return;
  
-       del_timer_sync(&buf->switch_timer);
+       //TODO
+       //del_timer_sync(&buf->switch_timer);
         buf->switch_timer_enabled = 0;
  }
  
@@ -309,31 +269,30 @@ static void lib_ring_buffer_stop_switch_timer(struct lib_ring_buffer *buf)
  static void read_buffer_timer(unsigned long data)
  {
         struct lib_ring_buffer *buf = (struct lib_ring_buffer *)data;
-       struct channel *chan = buf->backend.chan;
+       struct channel *chan = shmp(buf->backend.chan);
         const struct lib_ring_buffer_config *config = chan->backend.config;
  
         CHAN_WARN_ON(chan, !buf->backend.allocated);
  
-       if (atomic_long_read(&buf->active_readers)
+       if (uatomic_read(&buf->active_readers)
             && lib_ring_buffer_poll_deliver(config, buf, chan)) {
-               wake_up_interruptible(&buf->read_wait);
-               wake_up_interruptible(&chan->read_wait);
+               //TODO
+               //wake_up_interruptible(&buf->read_wait);
+               //wake_up_interruptible(&chan->read_wait);
         }
  
-       if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
-               mod_timer_pinned(&buf->read_timer,
-                                jiffies + chan->read_timer_interval);
-       else
-               mod_timer(&buf->read_timer,
-                         jiffies + chan->read_timer_interval);
+       //TODO
+       //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
+       //      mod_timer_pinned(&buf->read_timer,
+       //                       jiffies + chan->read_timer_interval);
+       //else
+       //      mod_timer(&buf->read_timer,
+       //                jiffies + chan->read_timer_interval);
  }
  
-/*
- * Called with ring_buffer_nohz_lock held for per-cpu buffers.
- */
  static void lib_ring_buffer_start_read_timer(struct lib_ring_buffer *buf)
  {
-       struct channel *chan = buf->backend.chan;
+       struct channel *chan = shmp(buf->backend.chan);
         const struct lib_ring_buffer_config *config = chan->backend.config;
  
         if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER
@@ -341,24 +300,22 @@ static void lib_ring_buffer_start_read_timer(struct lib_ring_buffer *buf)
             || buf->read_timer_enabled)
                 return;
  
-       init_timer(&buf->read_timer);
-       buf->read_timer.function = read_buffer_timer;
-       buf->read_timer.expires = jiffies + chan->read_timer_interval;
-       buf->read_timer.data = (unsigned long)buf;
+       //TODO
+       //init_timer(&buf->read_timer);
+       //buf->read_timer.function = read_buffer_timer;
+       //buf->read_timer.expires = jiffies + chan->read_timer_interval;
+       //buf->read_timer.data = (unsigned long)buf;
  
-       if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
-               add_timer_on(&buf->read_timer, buf->backend.cpu);
-       else
-               add_timer(&buf->read_timer);
+       //if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
+       //      add_timer_on(&buf->read_timer, buf->backend.cpu);
+       //else
+       //      add_timer(&buf->read_timer);
         buf->read_timer_enabled = 1;
  }
  
-/*
- * Called with ring_buffer_nohz_lock held for per-cpu buffers.
- */
  static void lib_ring_buffer_stop_read_timer(struct lib_ring_buffer *buf)
  {
-       struct channel *chan = buf->backend.chan;
+       struct channel *chan = shmp(buf->backend.chan);
         const struct lib_ring_buffer_config *config = chan->backend.config;
  
         if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER
@@ -366,202 +323,34 @@ static void lib_ring_buffer_stop_read_timer(struct lib_ring_buffer *buf)
             || !buf->read_timer_enabled)
                 return;
  
-       del_timer_sync(&buf->read_timer);
+       //TODO
+       //del_timer_sync(&buf->read_timer);
         /*
          * do one more check to catch data that has been written in the last
          * timer period.
          */
         if (lib_ring_buffer_poll_deliver(config, buf, chan)) {
-               wake_up_interruptible(&buf->read_wait);
-               wake_up_interruptible(&chan->read_wait);
+               //TODO
+               //wake_up_interruptible(&buf->read_wait);
+               //wake_up_interruptible(&chan->read_wait);
         }
         buf->read_timer_enabled = 0;
  }
  
-#ifdef CONFIG_HOTPLUG_CPU
-/**
- *     lib_ring_buffer_cpu_hp_callback - CPU hotplug callback
- *     @nb: notifier block
- *     @action: hotplug action to take
- *     @hcpu: CPU number
- *
- *     Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD)
- */
-static
-int __cpuinit lib_ring_buffer_cpu_hp_callback(struct notifier_block *nb,
-                                             unsigned long action,
-                                             void *hcpu)
-{
-       unsigned int cpu = (unsigned long)hcpu;
-       struct channel *chan = caa_container_of(nb, struct channel,
-                                           cpu_hp_notifier);
-       struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, cpu);
-       const struct lib_ring_buffer_config *config = chan->backend.config;
-
-       if (!chan->cpu_hp_enable)
-               return NOTIFY_DONE;
-
-       CHAN_WARN_ON(chan, config->alloc == RING_BUFFER_ALLOC_GLOBAL);
-
-       switch (action) {
-       case CPU_DOWN_FAILED:
-       case CPU_DOWN_FAILED_FROZEN:
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-               wake_up_interruptible(&chan->hp_wait);
-               lib_ring_buffer_start_switch_timer(buf);
-               lib_ring_buffer_start_read_timer(buf);
-               return NOTIFY_OK;
-
-       case CPU_DOWN_PREPARE:
-       case CPU_DOWN_PREPARE_FROZEN:
-               lib_ring_buffer_stop_switch_timer(buf);
-               lib_ring_buffer_stop_read_timer(buf);
-               return NOTIFY_OK;
-
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               /*
-                * Performing a buffer switch on a remote CPU. Performed by
-                * the CPU responsible for doing the hotunplug after the target
-                * CPU stopped running completely. Ensures that all data
-                * from that remote CPU is flushed.
-                */
-               lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE);
-               return NOTIFY_OK;
-
-       default:
-               return NOTIFY_DONE;
-       }
-}
-#endif
-
-#if defined(CONFIG_NO_HZ) && defined(CONFIG_LIB_RING_BUFFER)
-/*
- * For per-cpu buffers, call the reader wakeups before switching the buffer, so
- * that wake-up-tracing generated events are flushed before going idle (in
- * tick_nohz). We test if the spinlock is locked to deal with the race where
- * readers try to sample the ring buffer before we perform the switch. We let
- * the readers retry in that case. If there is data in the buffer, the wake up
- * is going to forbid the CPU running the reader thread from going idle.
- */
-static int notrace ring_buffer_tick_nohz_callback(struct notifier_block *nb,
-                                                 unsigned long val,
-                                                 void *data)
-{
-       struct channel *chan = caa_container_of(nb, struct channel,
-                                           tick_nohz_notifier);
-       const struct lib_ring_buffer_config *config = chan->backend.config;
-       struct lib_ring_buffer *buf;
-       int cpu = smp_processor_id();
-
-       if (config->alloc != RING_BUFFER_ALLOC_PER_CPU) {
-               /*
-                * We don't support keeping the system idle with global buffers
-                * and streaming active. In order to do so, we would need to
-                * sample a non-nohz-cpumask racelessly with the nohz updates
-                * without adding synchronization overhead to nohz. Leave this
-                * use-case out for now.
-                */
-               return 0;
-       }
-
-       buf = channel_get_ring_buffer(config, chan, cpu);
-       switch (val) {
-       case TICK_NOHZ_FLUSH:
-               raw_spin_lock(&buf->raw_tick_nohz_spinlock);
-               if (config->wakeup == RING_BUFFER_WAKEUP_BY_TIMER
-                   && chan->read_timer_interval
-                   && atomic_long_read(&buf->active_readers)
-                   && (lib_ring_buffer_poll_deliver(config, buf, chan)
-                       || lib_ring_buffer_pending_data(config, buf, chan))) {
-                       wake_up_interruptible(&buf->read_wait);
-                       wake_up_interruptible(&chan->read_wait);
-               }
-               if (chan->switch_timer_interval)
-                       lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE);
-               raw_spin_unlock(&buf->raw_tick_nohz_spinlock);
-               break;
-       case TICK_NOHZ_STOP:
-               spin_lock(&__get_cpu_var(ring_buffer_nohz_lock));
-               lib_ring_buffer_stop_switch_timer(buf);
-               lib_ring_buffer_stop_read_timer(buf);
-               spin_unlock(&__get_cpu_var(ring_buffer_nohz_lock));
-               break;
-       case TICK_NOHZ_RESTART:
-               spin_lock(&__get_cpu_var(ring_buffer_nohz_lock));
-               lib_ring_buffer_start_read_timer(buf);
-               lib_ring_buffer_start_switch_timer(buf);
-               spin_unlock(&__get_cpu_var(ring_buffer_nohz_lock));
-               break;
-       }
-
-       return 0;
-}
-
-void notrace lib_ring_buffer_tick_nohz_flush(void)
-{
-       atomic_notifier_call_chain(&tick_nohz_notifier, TICK_NOHZ_FLUSH,
-                                  NULL);
-}
-
-void notrace lib_ring_buffer_tick_nohz_stop(void)
-{
-       atomic_notifier_call_chain(&tick_nohz_notifier, TICK_NOHZ_STOP,
-                                  NULL);
-}
-
-void notrace lib_ring_buffer_tick_nohz_restart(void)
-{
-       atomic_notifier_call_chain(&tick_nohz_notifier, TICK_NOHZ_RESTART,
-                                  NULL);
-}
-#endif /* defined(CONFIG_NO_HZ) && defined(CONFIG_LIB_RING_BUFFER) */
-
-/*
- * Holds CPU hotplug.
- */
  static void channel_unregister_notifiers(struct channel *chan)
  {
         const struct lib_ring_buffer_config *config = chan->backend.config;
         int cpu;
  
-       channel_iterator_unregister_notifiers(chan);
         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
-#ifdef CONFIG_NO_HZ
-               /*
-                * Remove the nohz notifier first, so we are certain we stop
-                * the timers.
-                */
-               atomic_notifier_chain_unregister(&tick_nohz_notifier,
-                                                &chan->tick_nohz_notifier);
-               /*
-                * ring_buffer_nohz_lock will not be needed below, because
-                * we just removed the notifiers, which were the only source of
-                * concurrency.
-                */
-#endif /* CONFIG_NO_HZ */
-#ifdef CONFIG_HOTPLUG_CPU
-               get_online_cpus();
-               chan->cpu_hp_enable = 0;
-               for_each_online_cpu(cpu) {
-                       struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf,
-                                                             cpu);
-                       lib_ring_buffer_stop_switch_timer(buf);
-                       lib_ring_buffer_stop_read_timer(buf);
-               }
-               put_online_cpus();
-               unregister_cpu_notifier(&chan->cpu_hp_notifier);
-#else
                 for_each_possible_cpu(cpu) {
-                       struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf,
-                                                             cpu);
+                       struct lib_ring_buffer *buf = &shmp(chan->backend.buf)[cpu];
+
                         lib_ring_buffer_stop_switch_timer(buf);
                         lib_ring_buffer_stop_read_timer(buf);
                 }
-#endif
         } else {
-               struct lib_ring_buffer *buf = chan->backend.buf;
+               struct lib_ring_buffer *buf = shmp(chan->backend.buf);
  
                 lib_ring_buffer_stop_switch_timer(buf);
                 lib_ring_buffer_stop_read_timer(buf);
@@ -571,9 +360,8 @@ static void channel_unregister_notifiers(struct channel *chan)
  
  static void channel_free(struct channel *chan)
  {
-       channel_iterator_free(chan);
         channel_backend_free(&chan->backend);
-       kfree(chan);
+       free(chan);
  }
  
  /**
@@ -590,6 +378,7 @@ static void channel_free(struct channel *chan)
   *                         padding to let readers get those sub-buffers.
   *                         Used for live streaming.
   * @read_timer_interval: Time interval (in us) to wake up pending readers.
+ * @shmid: shared memory ID (output)
   *
   * Holds cpu hotplug.
   * Returns NULL on failure.
@@ -598,79 +387,104 @@ struct channel *channel_create(const struct lib_ring_buffer_config *config,
                    const char *name, void *priv, void *buf_addr,
                    size_t subbuf_size,
                    size_t num_subbuf, unsigned int switch_timer_interval,
-                  unsigned int read_timer_interval)
+                  unsigned int read_timer_interval,
+                  int *shmid)
  {
         int ret, cpu;
         struct channel *chan;
+       size_t shmsize, bufshmsize;
+       struct shm_header *shm_header;
+       unsigned long num_subbuf_alloc;
  
         if (lib_ring_buffer_check_config(config, switch_timer_interval,
                                          read_timer_interval))
                 return NULL;
  
-       chan = kzalloc(sizeof(struct channel), GFP_KERNEL);
-       if (!chan)
+       /* Calculate the shm allocation layout */
+       shmsize = sizeof(struct shm_header);
+       shmsize += sizeof(struct channel);
+
+       /* Per-cpu buffer size: control (prior to backend) */
+       bufshmsize = sizeof(struct lib_ring_buffer);
+       shmsize += bufshmsize * num_possible_cpus();
+
+       /* Per-cpu buffer size: backend */
+       /* num_subbuf + 1 is the worse case */
+       num_subbuf_alloc = num_subbuf + 1;
+       bufshmsize = sizeof(struct lib_ring_buffer_backend_pages *) * num_subbuf_alloc;
+       bufshmsize += subbuf_size * (num_subbuf_alloc);
+       bufshmsize += (sizeof(struct lib_ring_buffer_backend_pages) + subbuf_size) * num_subbuf_alloc;
+       bufshmsize += sizeof(struct lib_ring_buffer_backend_subbuffer) * num_subbuf;
+       shmsize += bufshmsize * num_possible_cpus();
+
+       /* Per-cpu buffer size: control (after backend) */
+       bufshmsize += sizeof(struct commit_counters_hot) * num_subbuf;
+       bufshmsize += sizeof(struct commit_counters_cold) * num_subbuf;
+
+       /* Allocate shm */
+       *shmid = shmget(getpid(), shmsize, IPC_CREAT | IPC_EXCL | 0700);
+       if (*shmid < 0) {
+               if (errno == EINVAL)
+                       ERR("shmget() returned EINVAL; maybe /proc/sys/kernel/shmmax should be increased.");
+               else
+                       PERROR("shmget");
                 return NULL;
+       }
  
-       ret = channel_backend_init(&chan->backend, name, config, priv,
-                                  subbuf_size, num_subbuf);
-       if (ret)
-               goto error;
+       shm_header = shmat(*shmid, NULL, 0);
+       if (shm_header == (void *) -1) {
+               perror("shmat");
+               goto destroy_shmem;
+       }
  
-       ret = channel_iterator_init(chan);
+       /* Already mark the shared memory for destruction. This will occur only
+         * when all users have detached.
+        */
+       ret = shmctl(*shmid, IPC_RMID, NULL);
+       if (ret == -1) {
+               perror("shmctl");
+               goto destroy_shmem;
+       }
+
+       shm_header->magic = SHM_MAGIC;
+       shm_header->major = SHM_MAJOR;
+       shm_header->major = SHM_MINOR;
+       shm_header->bits_per_long = CAA_BITS_PER_LONG;
+       shm_header->shm_size = shmsize;
+       shm_header->shm_allocated = sizeof(struct shm_header);
+
+       chan = zalloc_shm(shm_header, sizeof(struct channel));
+       if (!chan)
+               goto destroy_shmem;
+       set_shmp(shm_header->chan, chan);
+
+       ret = channel_backend_init(&chan->backend, name, config, priv,
+                                  subbuf_size, num_subbuf, shm_header);
         if (ret)
-               goto error_free_backend;
+               goto destroy_shmem;
  
         chan->commit_count_mask = (~0UL >> chan->backend.num_subbuf_order);
-       chan->switch_timer_interval = usecs_to_jiffies(switch_timer_interval);
-       chan->read_timer_interval = usecs_to_jiffies(read_timer_interval);
-       kref_init(&chan->ref);
-       init_waitqueue_head(&chan->read_wait);
-       init_waitqueue_head(&chan->hp_wait);
+       //TODO
+       //chan->switch_timer_interval = usecs_to_jiffies(switch_timer_interval);
+       //chan->read_timer_interval = usecs_to_jiffies(read_timer_interval);
+       urcu_ref_init(&chan->ref);
+       //TODO
+       //init_waitqueue_head(&chan->read_wait);
+       //init_waitqueue_head(&chan->hp_wait);
  
         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
-#if defined(CONFIG_NO_HZ) && defined(CONFIG_LIB_RING_BUFFER)
-               /* Only benefit from NO_HZ idle with per-cpu buffers for now. */
-               chan->tick_nohz_notifier.notifier_call =
-                       ring_buffer_tick_nohz_callback;
-               chan->tick_nohz_notifier.priority = ~0U;
-               atomic_notifier_chain_register(&tick_nohz_notifier,
-                                      &chan->tick_nohz_notifier);
-#endif /* defined(CONFIG_NO_HZ) && defined(CONFIG_LIB_RING_BUFFER) */
-
                 /*
                  * In case of non-hotplug cpu, if the ring-buffer is allocated
                  * in early initcall, it will not be notified of secondary cpus.
                  * In that off case, we need to allocate for all possible cpus.
                  */
-#ifdef CONFIG_HOTPLUG_CPU
-               chan->cpu_hp_notifier.notifier_call =
-                               lib_ring_buffer_cpu_hp_callback;
-               chan->cpu_hp_notifier.priority = 6;
-               register_cpu_notifier(&chan->cpu_hp_notifier);
-
-               get_online_cpus();
-               for_each_online_cpu(cpu) {
-                       struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf,
-                                                              cpu);
-                       spin_lock(&per_cpu(ring_buffer_nohz_lock, cpu));
-                       lib_ring_buffer_start_switch_timer(buf);
-                       lib_ring_buffer_start_read_timer(buf);
-                       spin_unlock(&per_cpu(ring_buffer_nohz_lock, cpu));
-               }
-               chan->cpu_hp_enable = 1;
-               put_online_cpus();
-#else
                 for_each_possible_cpu(cpu) {
-                       struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf,
-                                                             cpu);
-                       spin_lock(&per_cpu(ring_buffer_nohz_lock, cpu));
+                       struct lib_ring_buffer *buf = &shmp(chan->backend.buf)[cpu];
                         lib_ring_buffer_start_switch_timer(buf);
                         lib_ring_buffer_start_read_timer(buf);
-                       spin_unlock(&per_cpu(ring_buffer_nohz_lock, cpu));
                 }
-#endif
         } else {
-               struct lib_ring_buffer *buf = chan->backend.buf;
+               struct lib_ring_buffer *buf = shmp(chan->backend.buf);
  
                 lib_ring_buffer_start_switch_timer(buf);
                 lib_ring_buffer_start_read_timer(buf);
@@ -678,18 +492,18 @@ struct channel *channel_create(const struct lib_ring_buffer_config *config,
  
         return chan;
  
-error_free_backend:
-       channel_backend_free(&chan->backend);
-error:
-       kfree(chan);
+destroy_shmem:
+       ret = shmctl(*shmid, IPC_RMID, NULL);
+       if (ret == -1) {
+               perror("shmctl");
+       }
         return NULL;
  }
-EXPORT_SYMBOL_GPL(channel_create);
  
  static
-void channel_release(struct kref *kref)
+void channel_release(struct urcu_ref *ref)
  {
-       struct channel *chan = caa_container_of(kref, struct channel, ref);
+       struct channel *chan = caa_container_of(ref, struct channel, ref);
         channel_free(chan);
  }
  
@@ -713,13 +527,8 @@ void *channel_destroy(struct channel *chan)
         channel_unregister_notifiers(chan);
  
         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
-               /*
-                * No need to hold cpu hotplug, because all notifiers have been
-                * unregistered.
-                */
                 for_each_channel_cpu(cpu, chan) {
-                       struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf,
-                                                             cpu);
+                       struct lib_ring_buffer *buf = &shmp(chan->backend.buf)[cpu];
  
                         if (config->cb.buffer_finalize)
                                 config->cb.buffer_finalize(buf,
@@ -730,12 +539,12 @@ void *channel_destroy(struct channel *chan)
                         /*
                          * Perform flush before writing to finalized.
                          */
-                       smp_wmb();
+                       cmm_smp_wmb();
                         CMM_ACCESS_ONCE(buf->finalized) = 1;
-                       wake_up_interruptible(&buf->read_wait);
+                       //wake_up_interruptible(&buf->read_wait);
                 }
         } else {
-               struct lib_ring_buffer *buf = chan->backend.buf;
+               struct lib_ring_buffer *buf = shmp(chan->backend.buf);
  
                 if (config->cb.buffer_finalize)
                         config->cb.buffer_finalize(buf, chan->backend.priv, -1);
@@ -744,62 +553,47 @@ void *channel_destroy(struct channel *chan)
                 /*
                  * Perform flush before writing to finalized.
                  */
-               smp_wmb();
+               cmm_smp_wmb();
                 CMM_ACCESS_ONCE(buf->finalized) = 1;
-               wake_up_interruptible(&buf->read_wait);
+               //wake_up_interruptible(&buf->read_wait);
         }
         CMM_ACCESS_ONCE(chan->finalized) = 1;
-       wake_up_interruptible(&chan->hp_wait);
-       wake_up_interruptible(&chan->read_wait);
-       kref_put(&chan->ref, channel_release);
+       //wake_up_interruptible(&chan->hp_wait);
+       //wake_up_interruptible(&chan->read_wait);
+       urcu_ref_put(&chan->ref, channel_release);
         priv = chan->backend.priv;
         return priv;
  }
-EXPORT_SYMBOL_GPL(channel_destroy);
  
  struct lib_ring_buffer *channel_get_ring_buffer(
                                         const struct lib_ring_buffer_config *config,
                                         struct channel *chan, int cpu)
  {
         if (config->alloc == RING_BUFFER_ALLOC_GLOBAL)
-               return chan->backend.buf;
+               return shmp(chan->backend.buf);
         else
-               return per_cpu_ptr(chan->backend.buf, cpu);
+               return &shmp(chan->backend.buf)[cpu];
  }
-EXPORT_SYMBOL_GPL(channel_get_ring_buffer);
  
  int lib_ring_buffer_open_read(struct lib_ring_buffer *buf)
  {
-       struct channel *chan = buf->backend.chan;
+       struct channel *chan = shmp(buf->backend.chan);
  
-       if (!atomic_long_add_unless(&buf->active_readers, 1, 1))
+       if (uatomic_cmpxchg(&buf->active_readers, 0, 1) != 0)
                 return -EBUSY;
-       kref_get(&chan->ref);
-       smp_mb__after_atomic_inc();
+       urcu_ref_get(&chan->ref);
+       cmm_smp_mb();
         return 0;
  }
-EXPORT_SYMBOL_GPL(lib_ring_buffer_open_read);
  
  void lib_ring_buffer_release_read(struct lib_ring_buffer *buf)
  {
-       struct channel *chan = buf->backend.chan;
-
-       CHAN_WARN_ON(chan, atomic_long_read(&buf->active_readers) != 1);
-       smp_mb__before_atomic_dec();
-       atomic_long_dec(&buf->active_readers);
-       kref_put(&chan->ref, channel_release);
-}
-EXPORT_SYMBOL_GPL(lib_ring_buffer_release_read);
+       struct channel *chan = shmp(buf->backend.chan);
  
-/*
- * Promote compiler barrier to a smp_mb().
- * For the specific ring buffer case, this IPI call should be removed if the
- * architecture does not reorder writes.  This should eventually be provided by
- * a separate architecture-specific infrastructure.
- */
-static void remote_mb(void *info)
-{
-       smp_mb();
+       CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1);
+       cmm_smp_mb();
+       uatomic_dec(&buf->active_readers);
+       urcu_ref_put(&chan->ref, channel_release);
  }
  
  /**
@@ -810,24 +604,22 @@ static void remote_mb(void *info)
   *
   * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no
   * data to read at consumed position, or 0 if the get operation succeeds.
- * Busy-loop trying to get data if the tick_nohz sequence lock is held.
   */
  
  int lib_ring_buffer_snapshot(struct lib_ring_buffer *buf,
                              unsigned long *consumed, unsigned long *produced)
  {
-       struct channel *chan = buf->backend.chan;
+       struct channel *chan = shmp(buf->backend.chan);
         const struct lib_ring_buffer_config *config = chan->backend.config;
         unsigned long consumed_cur, write_offset;
         int finalized;
  
-retry:
         finalized = CMM_ACCESS_ONCE(buf->finalized);
         /*
          * Read finalized before counters.
          */
-       smp_rmb();
-       consumed_cur = atomic_long_read(&buf->consumed);
+       cmm_smp_rmb();
+       consumed_cur = uatomic_read(&buf->consumed);
         /*
          * No need to issue a memory barrier between consumed count read and
          * write offset read, because consumed count can only change
@@ -858,12 +650,9 @@ nodata:
          */
         if (finalized)
                 return -ENODATA;
-       else if (raw_spin_is_locked(&buf->raw_tick_nohz_spinlock))
-               goto retry;
         else
                 return -EAGAIN;
  }
-EXPORT_SYMBOL_GPL(lib_ring_buffer_snapshot);
  
  /**
   * lib_ring_buffer_put_snapshot - move consumed counter forward
@@ -874,22 +663,21 @@ void lib_ring_buffer_move_consumer(struct lib_ring_buffer *buf,
                                    unsigned long consumed_new)
  {
         struct lib_ring_buffer_backend *bufb = &buf->backend;
-       struct channel *chan = bufb->chan;
+       struct channel *chan = shmp(bufb->chan);
         unsigned long consumed;
  
-       CHAN_WARN_ON(chan, atomic_long_read(&buf->active_readers) != 1);
+       CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1);
  
         /*
          * Only push the consumed value forward.
          * If the consumed cmpxchg fails, this is because we have been pushed by
          * the writer in flight recorder mode.
          */
-       consumed = atomic_long_read(&buf->consumed);
+       consumed = uatomic_read(&buf->consumed);
         while ((long) consumed - (long) consumed_new < 0)
-               consumed = atomic_long_cmpxchg(&buf->consumed, consumed,
-                                              consumed_new);
+               consumed = uatomic_cmpxchg(&buf->consumed, consumed,
+                                          consumed_new);
  }
-EXPORT_SYMBOL_GPL(lib_ring_buffer_move_consumer);
  
  /**
   * lib_ring_buffer_get_subbuf - get exclusive access to subbuffer for reading
@@ -898,12 +686,11 @@ EXPORT_SYMBOL_GPL(lib_ring_buffer_move_consumer);
   *
   * Returns -ENODATA if buffer is finalized, -EAGAIN if there is currently no
   * data to read at consumed position, or 0 if the get operation succeeds.
- * Busy-loop trying to get data if the tick_nohz sequence lock is held.
   */
  int lib_ring_buffer_get_subbuf(struct lib_ring_buffer *buf,
                                unsigned long consumed)
  {
-       struct channel *chan = buf->backend.chan;
+       struct channel *chan = shmp(buf->backend.chan);
         const struct lib_ring_buffer_config *config = chan->backend.config;
         unsigned long consumed_cur, consumed_idx, commit_count, write_offset;
         int ret;
@@ -914,72 +701,21 @@ retry:
         /*
          * Read finalized before counters.
          */
-       smp_rmb();
-       consumed_cur = atomic_long_read(&buf->consumed);
+       cmm_smp_rmb();
+       consumed_cur = uatomic_read(&buf->consumed);
         consumed_idx = subbuf_index(consumed, chan);
-       commit_count = v_read(config, &buf->commit_cold[consumed_idx].cc_sb);
+       commit_count = v_read(config, &shmp(buf->commit_cold)[consumed_idx].cc_sb);
         /*
          * Make sure we read the commit count before reading the buffer
          * data and the write offset. Correct consumed offset ordering
          * wrt commit count is insured by the use of cmpxchg to update
          * the consumed offset.
-        * smp_call_function_single can fail if the remote CPU is offline,
-        * this is OK because then there is no wmb to execute there.
-        * If our thread is executing on the same CPU as the on the buffers
-        * belongs to, we don't have to synchronize it at all. If we are
-        * migrated, the scheduler will take care of the memory barriers.
-        * Normally, smp_call_function_single() should ensure program order when
-        * executing the remote function, which implies that it surrounds the
-        * function execution with :
-        * smp_mb()
-        * send IPI
-        * csd_lock_wait
-        *                recv IPI
-        *                smp_mb()
-        *                exec. function
-        *                smp_mb()
-        *                csd unlock
-        * smp_mb()
-        *
-        * However, smp_call_function_single() does not seem to clearly execute
-        * such barriers. It depends on spinlock semantic to provide the barrier
-        * before executing the IPI and, when busy-looping, csd_lock_wait only
-        * executes smp_mb() when it has to wait for the other CPU.
-        *
-        * I don't trust this code. Therefore, let's add the smp_mb() sequence
-        * required ourself, even if duplicated. It has no performance impact
-        * anyway.
-        *
-        * smp_mb() is needed because smp_rmb() and smp_wmb() only order read vs
-        * read and write vs write. They do not ensure core synchronization. We
-        * really have to ensure total order between the 3 barriers running on
-        * the 2 CPUs.
          */
-       if (config->ipi == RING_BUFFER_IPI_BARRIER) {
-               if (config->sync == RING_BUFFER_SYNC_PER_CPU
-                   && config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
-                       if (raw_smp_processor_id() != buf->backend.cpu) {
-                               /* Total order with IPI handler smp_mb() */
-                               smp_mb();
-                               smp_call_function_single(buf->backend.cpu,
-                                                        remote_mb, NULL, 1);
-                               /* Total order with IPI handler smp_mb() */
-                               smp_mb();
-                       }
-               } else {
-                       /* Total order with IPI handler smp_mb() */
-                       smp_mb();
-                       smp_call_function(remote_mb, NULL, 1);
-                       /* Total order with IPI handler smp_mb() */
-                       smp_mb();
-               }
-       } else {
-               /*
-                * Local rmb to match the remote wmb to read the commit count
-                * before the buffer data and the write offset.
-                */
-               smp_rmb();
-       }
+       /*
+        * Local rmb to match the remote wmb to read the commit count
+        * before the buffer data and the write offset.
+        */
+       cmm_smp_rmb();
  
         write_offset = v_read(config, &buf->offset);
  
@@ -1035,12 +771,9 @@ nodata:
          */
         if (finalized)
                 return -ENODATA;
-       else if (raw_spin_is_locked(&buf->raw_tick_nohz_spinlock))
-               goto retry;
         else
                 return -EAGAIN;
  }
-EXPORT_SYMBOL_GPL(lib_ring_buffer_get_subbuf);
  
  /**
   * lib_ring_buffer_put_subbuf - release exclusive subbuffer access
@@ -1049,11 +782,11 @@ EXPORT_SYMBOL_GPL(lib_ring_buffer_get_subbuf);
  void lib_ring_buffer_put_subbuf(struct lib_ring_buffer *buf)
  {
         struct lib_ring_buffer_backend *bufb = &buf->backend;
-       struct channel *chan = bufb->chan;
+       struct channel *chan = shmp(bufb->chan);
         const struct lib_ring_buffer_config *config = chan->backend.config;
         unsigned long read_sb_bindex, consumed_idx, consumed;
  
-       CHAN_WARN_ON(chan, atomic_long_read(&buf->active_readers) != 1);
+       CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1);
  
         if (!buf->get_subbuf) {
                 /*
@@ -1074,9 +807,9 @@ void lib_ring_buffer_put_subbuf(struct lib_ring_buffer *buf)
          */
         read_sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id);
         v_add(config, v_read(config,
-                            &bufb->array[read_sb_bindex]->records_unread),
+                            &shmp(bufb->array)[read_sb_bindex]->records_unread),
               &bufb->records_read);
-       v_set(config, &bufb->array[read_sb_bindex]->records_unread, 0);
+       v_set(config, &shmp(bufb->array)[read_sb_bindex]->records_unread, 0);
         CHAN_WARN_ON(chan, config->mode == RING_BUFFER_OVERWRITE
                      && subbuffer_id_is_noref(config, bufb->buf_rsb.id));
         subbuffer_id_set_noref(config, &bufb->buf_rsb.id);
@@ -1097,7 +830,6 @@ void lib_ring_buffer_put_subbuf(struct lib_ring_buffer *buf)
          * if the writer concurrently updated it.
          */
  }
-EXPORT_SYMBOL_GPL(lib_ring_buffer_put_subbuf);
  
  /*
   * cons_offset is an iterator on all subbuffer offsets between the reader
@@ -1113,12 +845,11 @@ void lib_ring_buffer_print_subbuffer_errors(struct lib_ring_buffer *buf,
         unsigned long cons_idx, commit_count, commit_count_sb;
  
         cons_idx = subbuf_index(cons_offset, chan);
-       commit_count = v_read(config, &buf->commit_hot[cons_idx].cc);
-       commit_count_sb = v_read(config, &buf->commit_cold[cons_idx].cc_sb);
+       commit_count = v_read(config, &shmp(buf->commit_hot)[cons_idx].cc);
+       commit_count_sb = v_read(config, &shmp(buf->commit_cold)[cons_idx].cc_sb);
  
         if (subbuf_offset(commit_count, chan) != 0)
-               printk(KERN_WARNING
-                      "ring buffer %s, cpu %d: "
+               ERRMSG("ring buffer %s, cpu %d: "
                        "commit count in subbuffer %lu,\n"
                        "expecting multiples of %lu bytes\n"
                        "  [ %lu bytes committed, %lu bytes reader-visible ]\n",
@@ -1126,7 +857,7 @@ void lib_ring_buffer_print_subbuffer_errors(struct lib_ring_buffer *buf,
                        chan->backend.subbuf_size,
                        commit_count, commit_count_sb);
  
-       printk(KERN_DEBUG "ring buffer: %s, cpu %d: %lu bytes committed\n",
+       ERRMSG("ring buffer: %s, cpu %d: %lu bytes committed\n",
                chan->backend.name, cpu, commit_count);
  }
  
@@ -1150,15 +881,14 @@ void lib_ring_buffer_print_buffer_errors(struct lib_ring_buffer *buf,
          * references are left.
          */
         write_offset = v_read(config, &buf->offset);
-       cons_offset = atomic_long_read(&buf->consumed);
+       cons_offset = uatomic_read(&buf->consumed);
         if (write_offset != cons_offset)
-               printk(KERN_WARNING
-                      "ring buffer %s, cpu %d: "
+               ERRMSG("ring buffer %s, cpu %d: "
                        "non-consumed data\n"
                        "  [ %lu bytes written, %lu bytes read ]\n",
                        chan->backend.name, cpu, write_offset, cons_offset);
  
-       for (cons_offset = atomic_long_read(&buf->consumed);
+       for (cons_offset = uatomic_read(&buf->consumed);
              (long) (subbuf_trunc((unsigned long) v_read(config, &buf->offset),
                                   chan)
                      - cons_offset) > 0;
@@ -1174,7 +904,7 @@ void lib_ring_buffer_print_errors(struct channel *chan,
         const struct lib_ring_buffer_config *config = chan->backend.config;
         void *priv = chan->backend.priv;
  
-       printk(KERN_DEBUG "ring buffer %s, cpu %d: %lu records written, "
+       ERRMSG("ring buffer %s, cpu %d: %lu records written, "
                           "%lu records overrun\n",
                           chan->backend.name, cpu,
                           v_read(config, &buf->records_count),
@@ -1183,8 +913,7 @@ void lib_ring_buffer_print_errors(struct channel *chan,
         if (v_read(config, &buf->records_lost_full)
             || v_read(config, &buf->records_lost_wrap)
             || v_read(config, &buf->records_lost_big))
-               printk(KERN_WARNING
-                      "ring buffer %s, cpu %d: records were lost. Caused by:\n"
+               ERRMSG("ring buffer %s, cpu %d: records were lost. Caused by:\n"
                        "  [ %lu buffer full, %lu nest buffer wrap-around, "
                        "%lu event too big ]\n",
                        chan->backend.name, cpu,
@@ -1216,18 +945,10 @@ void lib_ring_buffer_switch_old_start(struct lib_ring_buffer *buf,
          * Order all writes to buffer before the commit count update that will
          * determine that the subbuffer is full.
          */
-       if (config->ipi == RING_BUFFER_IPI_BARRIER) {
-               /*
-                * Must write slot data before incrementing commit count.  This
-                * compiler barrier is upgraded into a smp_mb() by the IPI sent
-                * by get_subbuf().
-                */
-               barrier();
-       } else
-               smp_wmb();
+       cmm_smp_wmb();
         v_add(config, config->cb.subbuffer_header_size(),
-             &buf->commit_hot[oldidx].cc);
-       commit_count = v_read(config, &buf->commit_hot[oldidx].cc);
+             &shmp(buf->commit_hot)[oldidx].cc);
+       commit_count = v_read(config, &shmp(buf->commit_hot)[oldidx].cc);
         /* Check if the written buffer has to be delivered */
         lib_ring_buffer_check_deliver(config, buf, chan, offsets->old,
                                       commit_count, oldidx);
@@ -1262,17 +983,9 @@ void lib_ring_buffer_switch_old_end(struct lib_ring_buffer *buf,
          * Order all writes to buffer before the commit count update that will
          * determine that the subbuffer is full.
          */
-       if (config->ipi == RING_BUFFER_IPI_BARRIER) {
-               /*
-                * Must write slot data before incrementing commit count.  This
-                * compiler barrier is upgraded into a smp_mb() by the IPI sent
-                * by get_subbuf().
-                */
-               barrier();
-       } else
-               smp_wmb();
-       v_add(config, padding_size, &buf->commit_hot[oldidx].cc);
-       commit_count = v_read(config, &buf->commit_hot[oldidx].cc);
+       cmm_smp_wmb();
+       v_add(config, padding_size, &shmp(buf->commit_hot)[oldidx].cc);
+       commit_count = v_read(config, &shmp(buf->commit_hot)[oldidx].cc);
         lib_ring_buffer_check_deliver(config, buf, chan, offsets->old - 1,
                                       commit_count, oldidx);
         lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx,
@@ -1303,18 +1016,10 @@ void lib_ring_buffer_switch_new_start(struct lib_ring_buffer *buf,
          * Order all writes to buffer before the commit count update that will
          * determine that the subbuffer is full.
          */
-       if (config->ipi == RING_BUFFER_IPI_BARRIER) {
-               /*
-                * Must write slot data before incrementing commit count.  This
-                * compiler barrier is upgraded into a smp_mb() by the IPI sent
-                * by get_subbuf().
-                */
-               barrier();
-       } else
-               smp_wmb();
+       cmm_smp_wmb();
         v_add(config, config->cb.subbuffer_header_size(),
-             &buf->commit_hot[beginidx].cc);
-       commit_count = v_read(config, &buf->commit_hot[beginidx].cc);
+             &shmp(buf->commit_hot)[beginidx].cc);
+       commit_count = v_read(config, &shmp(buf->commit_hot)[beginidx].cc);
         /* Check if the written buffer has to be delivered */
         lib_ring_buffer_check_deliver(config, buf, chan, offsets->begin,
                                       commit_count, beginidx);
@@ -1347,17 +1052,9 @@ void lib_ring_buffer_switch_new_end(struct lib_ring_buffer *buf,
          * Order all writes to buffer before the commit count update that will
          * determine that the subbuffer is full.
          */
-       if (config->ipi == RING_BUFFER_IPI_BARRIER) {
-               /*
-                * Must write slot data before incrementing commit count.  This
-                * compiler barrier is upgraded into a smp_mb() by the IPI sent
-                * by get_subbuf().
-                */
-               barrier();
-       } else
-               smp_wmb();
-       v_add(config, padding_size, &buf->commit_hot[endidx].cc);
-       commit_count = v_read(config, &buf->commit_hot[endidx].cc);
+       cmm_smp_wmb();
+       v_add(config, padding_size, &shmp(buf->commit_hot)[endidx].cc);
+       commit_count = v_read(config, &shmp(buf->commit_hot)[endidx].cc);
         lib_ring_buffer_check_deliver(config, buf, chan, offsets->end - 1,
                                   commit_count, endidx);
         lib_ring_buffer_write_commit_counter(config, buf, chan, endidx,
@@ -1398,10 +1095,10 @@ int lib_ring_buffer_try_switch_slow(enum switch_mode mode,
          * The next record that reserves space will be responsible for
          * populating the following subbuffer header. We choose not to populate
          * the next subbuffer header here because we want to be able to use
-        * SWITCH_ACTIVE for periodical buffer flush and CPU tick_nohz stop
-        * buffer flush, which must guarantee that all the buffer content
-        * (records and header timestamps) are visible to the reader. This is
-        * required for quiescence guarantees for the fusion merge.
+        * SWITCH_ACTIVE for periodical buffer flush, which must
+        * guarantee that all the buffer content (records and header
+        * timestamps) are visible to the reader. This is required for
+        * quiescence guarantees for the fusion merge.
          */
         if (mode == SWITCH_FLUSH || off > 0) {
                 if (unlikely(off == 0)) {
@@ -1435,7 +1132,7 @@ int lib_ring_buffer_try_switch_slow(enum switch_mode mode,
   */
  void lib_ring_buffer_switch_slow(struct lib_ring_buffer *buf, enum switch_mode mode)
  {
-       struct channel *chan = buf->backend.chan;
+       struct channel *chan = shmp(buf->backend.chan);
         const struct lib_ring_buffer_config *config = chan->backend.config;
         struct switch_offsets offsets;
         unsigned long oldidx;
@@ -1482,7 +1179,6 @@ void lib_ring_buffer_switch_slow(struct lib_ring_buffer *buf, enum switch_mode m
          */
         lib_ring_buffer_switch_old_end(buf, chan, &offsets, tsc);
  }
-EXPORT_SYMBOL_GPL(lib_ring_buffer_switch_slow);
  
  /*
   * Returns :
@@ -1547,14 +1243,14 @@ int lib_ring_buffer_try_reserve_slow(struct lib_ring_buffer *buf,
                   (buf_trunc(offsets->begin, chan)
                    >> chan->backend.num_subbuf_order)
                   - ((unsigned long) v_read(config,
-                                           &buf->commit_cold[sb_index].cc_sb)
+                                           &shmp(buf->commit_cold)[sb_index].cc_sb)
                      & chan->commit_count_mask);
                 if (likely(reserve_commit_diff == 0)) {
                         /* Next subbuffer not being written to. */
                         if (unlikely(config->mode != RING_BUFFER_OVERWRITE &&
                                 subbuf_trunc(offsets->begin, chan)
                                  - subbuf_trunc((unsigned long)
-                                    atomic_long_read(&buf->consumed), chan)
+                                    uatomic_read(&buf->consumed), chan)
                                 >= chan->backend.buf_size)) {
                                 /*
                                  * We do not overwrite non consumed buffers
@@ -1638,9 +1334,9 @@ int lib_ring_buffer_reserve_slow(struct lib_ring_buffer_ctx *ctx)
         int ret;
  
         if (config->alloc == RING_BUFFER_ALLOC_PER_CPU)
-               buf = per_cpu_ptr(chan->backend.buf, ctx->cpu);
+               buf = &shmp(chan->backend.buf)[ctx->cpu];
         else
-               buf = chan->backend.buf;
+               buf = shmp(chan->backend.buf);
         ctx->buf = buf;
  
         offsets.size = 0;
@@ -1696,4 +1392,3 @@ int lib_ring_buffer_reserve_slow(struct lib_ring_buffer_ctx *ctx)
         ctx->buf_offset = offsets.begin + offsets.pre_header_padding;
         return 0;
  }
-EXPORT_SYMBOL_GPL(lib_ring_buffer_reserve_slow);
diff --git a/libringbuffer/shm.h b/libringbuffer/shm.h

new file mode 100644 (file)

index 0000000..390a5b2
--- /dev/null
+++ b/libringbuffer/shm.h
@@ -0,0 +1,71 @@
+#ifndef _LIBRINGBUFFER_SHM_H
+#define _LIBRINGBUFFER_SHM_H
+
+/*
+ * libringbuffer/shm.h
+ *
+ * Copyright 2011 (c) - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * Dual LGPL v2.1/GPL v2 license.
+ */
+
+#include <stdint.h>
+#include "ust/core.h"
+
+#define SHM_MAGIC      0x54335433
+#define SHM_MAJOR      0
+#define SHM_MINOR      1
+
+/*
+ * Defining a max shm offset, for debugging purposes.
+ */
+#if (CAA_BITS_PER_LONG == 32)
+/* Define the maximum shared memory size to 128MB on 32-bit machines */
+#define MAX_SHM_SIZE   134217728
+#else
+/* Define the maximum shared memory size to 8GB on 64-bit machines */
+#define MAX_SHM_SIZE   8589934592
+#endif
+
+#define DECLARE_SHMP(type, name)       type *****name
+
+struct shm_header {
+       uint32_t magic;
+       uint8_t major;
+       uint8_t minor;
+       uint8_t bits_per_long;
+       size_t shm_size, shm_allocated;
+
+       DECLARE_SHMP(struct channel, chan);
+};
+
+#define shmp(shm_offset)               \
+       ((__typeof__(****(shm_offset))) (((char *) &(shm_offset)) + (ptrdiff_t) (shm_offset)))
+
+#define _shmp_abs(a)   ((a < 0) ? -(a) : (a))
+
+static inline
+void _set_shmp(ptrdiff_t *shm_offset, void *ptr)
+{
+       *shm_offset = (((char *) ptr) - ((char *) shm_offset));
+       assert(_shmp_abs(*shm_offset) < MAX_SHM_SIZE);
+}
+
+#define set_shmp(shm_offset, ptr)      \
+       _set_shmp((ptrdiff_t *) ****(shm_offset), ptr)
+
+/* Shared memory is already zeroed by shmget */
+/* *NOT* multithread-safe (should be protected by mutex) */
+static inline
+void *zalloc_shm(struct shm_header *shm_header, size_t len)
+{
+       void *ret;
+
+       if (shm_header->shm_size - shm_header->shm_allocated < len)
+               return NULL;
+       ret = (char *) shm_header + shm_header->shm_allocated;
+       shm_header->shm_allocated += len;
+       return ret;
+}
+
+#endif /* _LIBRINGBUFFER_SHM_H */
diff --git a/libringbuffer/smp.c b/libringbuffer/smp.c

new file mode 100644 (file)

index 0000000..3f86ac8
--- /dev/null
+++ b/libringbuffer/smp.c
@@ -0,0 +1,33 @@
+/*
+ * libust/smp.c
+ *
+ * Copyright 2011 (c) - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * Dual LGPL v2.1/GPL v2 license.
+ */
+
+#include <unistd.h>
+#include "ust/core.h"
+#include "usterr.h"
+#include <pthread.h>
+#include "smp.h"
+
+int __num_possible_cpus;
+
+void _get_num_possible_cpus(void)
+{
+       int result;
+
+       /* On Linux, when some processors are offline
+        * _SC_NPROCESSORS_CONF counts the offline
+        * processors, whereas _SC_NPROCESSORS_ONLN
+        * does not. If we used _SC_NPROCESSORS_ONLN,
+        * getcpu() could return a value greater than
+        * this sysconf, in which case the arrays
+        * indexed by processor would overflow.
+        */
+       result = sysconf(_SC_NPROCESSORS_CONF);
+       if (result == -1)
+               return;
+       __num_possible_cpus = result;
+}
diff --git a/libringbuffer/smp.h b/libringbuffer/smp.h

new file mode 100644 (file)

index 0000000..3d138a9
--- /dev/null
+++ b/libringbuffer/smp.h
@@ -0,0 +1,73 @@
+#ifndef _LIBRINGBUFFER_SMP_H
+#define _LIBRINGBUFFER_SMP_H
+
+/*
+ * libringbuffer/smp.h
+ *
+ * Copyright 2011 (c) - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * Dual LGPL v2.1/GPL v2 license.
+ */
+
+#include <ust/core.h>
+
+/*
+ * 4kB of per-cpu data available. Enough to hold the control structures,
+ * but not ring buffers.
+ */
+#define PER_CPU_MEM_SIZE       4096
+
+extern int __num_possible_cpus;
+extern void _get_num_possible_cpus(void);
+
+static inline
+int num_possible_cpus(void)
+{
+       if (!__num_possible_cpus)
+               _get_num_possible_cpus();
+       return __num_possible_cpus;
+}
+
+/*
+ * get_cpu() returns the current CPU number. It may change due to
+ * migration, so it is only statistically accurate.
+ */
+#ifndef UST_VALGRIND
+static inline
+int get_cpu(void)
+{
+       int cpu;
+
+       cpu = sched_getcpu();
+       if (likely(cpu >= 0))
+               return cpu;
+       /*
+        * If getcpu(2) is not implemented in the Kernel use CPU 0 as fallback.
+        */
+       return 0;
+}
+
+#else  /* #else #ifndef UST_VALGRIND */
+static inline
+int get_cpu(void)
+{
+       /*
+        * Valgrind does not support the sched_getcpu() vsyscall.
+        * It causes it to detect a segfault in the program and stop it.
+        * So if we want to check libust with valgrind, we have to refrain
+        * from using this call. TODO: it would probably be better to return
+        * other values too, to better test it.
+        */
+       return 0;
+}
+#endif /* #else #ifndef UST_VALGRIND */
+
+static inline
+void put_cpu(void)
+{
+}
+
+#define for_each_possible_cpu(cpu)             \
+       for ((cpu) = 0; (cpu) < num_possible_cpus(); (cpu)++)
+
+#endif /* _LIBRINGBUFFER_SMP_H */
diff --git a/libust/buffers.c b/libust/buffers.c

index 8ecebb9355060bb752d7f66f9f0f4f0895ac747f..0e198da1bc21e3a023e63c10156bc38f3c463fd3 100644 (file)
--- a/libust/buffers.c
+++ b/libust/buffers.c
@@ -55,31 +55,6 @@ static CDS_LIST_HEAD(ust_buffers_channels);
  static void ltt_force_switch(struct ust_buffer *buf,
                 enum force_switch_mode mode);
  
-static int get_n_cpus(void)
-{
-       int result;
-       static int n_cpus = 0;
-
-       if(!n_cpus) {
-               /* On Linux, when some processors are offline
-                * _SC_NPROCESSORS_CONF counts the offline
-                * processors, whereas _SC_NPROCESSORS_ONLN
-                * does not. If we used _SC_NPROCESSORS_ONLN,
-                * getcpu() could return a value greater than
-                * this sysconf, in which case the arrays
-                * indexed by processor would overflow.
-                */
-               result = sysconf(_SC_NPROCESSORS_CONF);
-               if(result == -1) {
-                       return -1;
-               }
-
-               n_cpus = result;
-       }
-
-       return n_cpus;
-}
-
  /**
   * _ust_buffers_strncpy_fixup - Fix an incomplete string in a ltt_relay buffer.
   * @buf : buffer
diff --git a/libust/tracer.h b/libust/tracer.h

index 4f72d7ad637e5e39de20df4b386fba84dd1e497e..9fd626e8238110cbfecef07eac854b13dd9511bd 100644 (file)
--- a/libust/tracer.h
+++ b/libust/tracer.h
@@ -345,37 +345,6 @@ static __inline__ void ltt_write_trace_header(struct ust_trace *trace,
         header->freq_scale = trace->freq_scale;
  }
  
-#ifndef UST_VALGRIND
-
-static __inline__ int ust_get_cpu(void)
-{
-       int cpu;
-
-       cpu = sched_getcpu();
-       if (likely(cpu >= 0))
-               return cpu;
-       /*
-        * If getcpu(2) is not implemented in the Kernel use CPU 0 as fallback.
-        */
-       return 0;
-}
-
-#else  /* #else #ifndef UST_VALGRIND */
-
-static __inline__ int ust_get_cpu(void)
-{
-       /*
-        * Valgrind does not support the sched_getcpu() vsyscall.
-        * It causes it to detect a segfault in the program and stop it.
-        * So if we want to check libust with valgrind, we have to refrain
-        * from using this call. TODO: it would probably be better to return
-        * other values too, to better test it.
-        */
-       return 0;
-}
-
-#endif /* #else #ifndef UST_VALGRIND */
-
  /*
   * Size reserved for high priority events (interrupts, NMI, BH) at the end of a
   * nearly full buffer. User space won't use this last amount of space when in
author	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
	Mon, 4 Jul 2011 17:03:45 +0000 (13:03 -0400)
committer	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
	Mon, 4 Jul 2011 17:03:45 +0000 (13:03 -0400)
.gitignore		patch \| blob \| blame \| history
include/ust/bug.h		patch \| blob \| blame \| history
include/ust/core.h		patch \| blob \| blame \| history
include/usterr_signal_safe.h		patch \| blob \| blame \| history
libringbuffer/Makefile.am		patch \| blob \| blame \| history
libringbuffer/backend.h		patch \| blob \| blame \| history
libringbuffer/backend_internal.h		patch \| blob \| blame \| history
libringbuffer/backend_types.h		patch \| blob \| blame \| history
libringbuffer/config.h	[new file with mode: 0644]	patch \| blob
libringbuffer/frontend.h		patch \| blob \| blame \| history
libringbuffer/frontend_internal.h		patch \| blob \| blame \| history
libringbuffer/frontend_types.h		patch \| blob \| blame \| history
libringbuffer/iterator.h	[deleted file]	patch \| blob \| blame \| history
libringbuffer/ring_buffer_abi.c		patch \| blob \| blame \| history
libringbuffer/ring_buffer_backend.c		patch \| blob \| blame \| history
libringbuffer/ring_buffer_frontend.c		patch \| blob \| blame \| history
libringbuffer/shm.h	[new file with mode: 0644]	patch \| blob
libringbuffer/smp.c	[new file with mode: 0644]	patch \| blob
libringbuffer/smp.h	[new file with mode: 0644]	patch \| blob
libust/buffers.c		patch \| blob \| blame \| history
libust/tracer.h		patch \| blob \| blame \| history