/*
* ring_buffer_frontend.c
*
- * (C) Copyright 2005-2010 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * Copyright (C) 2005-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; only
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
*
* Ring buffer wait-free buffer synchronization. Producer-consumer and flight
* recorder (overwrite) modes. See thesis:
* - splice one subbuffer worth of data to a pipe
* - splice the data from pipe to disk/network
* - put_subbuf
- *
- * Dual LGPL v2.1/GPL v2 license.
*/
+#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <urcu/compiler.h>
#include <urcu/ref.h>
+#include <helper.h>
#include "smp.h"
#include <lttng/ringbuffer-config.h>
+#include "vatomic.h"
#include "backend.h"
#include "frontend.h"
#include "shm.h"
+#include "tlsfixup.h"
+#include "../liblttng-ust/compat.h" /* For ENODATA */
#ifndef max
#define max(a, b) ((a) > (b) ? (a) : (b))
#endif
+/* Print DBG() messages about events lost only every 1048576 hits */
+#define DBG_PRINT_NR_LOST (1UL << 20)
+
/*
* Use POSIX SHM: shm_open(3) and shm_unlink(3).
* close(2) to close the fd returned by shm_open.
__thread unsigned int lib_ring_buffer_nesting;
+/*
+ * TODO: this is unused. Errors are saved within the ring buffer.
+ * Eventually, allow consumerd to print these errors.
+ */
static
void lib_ring_buffer_print_errors(struct channel *chan,
struct lttng_ust_lib_ring_buffer *buf, int cpu,
- struct lttng_ust_shm_handle *handle);
-
-/*
- * Must be called under cpu hotplug protection.
- */
-void lib_ring_buffer_free(struct lttng_ust_lib_ring_buffer *buf,
- struct lttng_ust_shm_handle *handle)
-{
- struct channel *chan = shmp(handle, buf->backend.chan);
-
- lib_ring_buffer_print_errors(chan, buf, buf->backend.cpu, handle);
- /* buf->commit_hot will be freed by shm teardown */
- /* buf->commit_cold will be freed by shm teardown */
-
- lib_ring_buffer_backend_free(&buf->backend);
-}
+ struct lttng_ust_shm_handle *handle)
+ __attribute__((unused));
/**
* lib_ring_buffer_reset - Reset ring buffer to initial values.
{
const struct lttng_ust_lib_ring_buffer_config *config = &chanb->config;
struct channel *chan = caa_container_of(chanb, struct channel, backend);
- void *priv = chanb->priv;
- unsigned int num_subbuf;
+ void *priv = channel_get_private(chan);
size_t subbuf_header_size;
- u64 tsc;
+ uint64_t tsc;
int ret;
/* Test for cpu hotplug */
goto free_commit;
}
- num_subbuf = chan->backend.num_subbuf;
- //init_waitqueue_head(&buf->read_wait);
-
/*
* Write the subbuffer header for first subbuffer so we know the total
* duration of data gathering.
free_commit:
/* commit_hot will be freed by shm teardown */
free_chanbuf:
- lib_ring_buffer_backend_free(&buf->backend);
return ret;
}
* channel_create - Create channel.
* @config: ring buffer instance configuration
* @name: name of the channel
- * @priv: ring buffer client private data
+ * @priv_data: ring buffer client private data area pointer (output)
+ * @priv_data_size: length, in bytes, of the private data area.
+ * @priv_data_init: initialization data for private data.
* @buf_addr: pointer the the beginning of the preallocated buffer contiguous
* address mapping. It is used only by RING_BUFFER_STATIC
* configuration. It can be set to NULL for other backends.
* Returns NULL on failure.
*/
struct lttng_ust_shm_handle *channel_create(const struct lttng_ust_lib_ring_buffer_config *config,
- const char *name, void *priv, void *buf_addr,
- size_t subbuf_size,
+ const char *name,
+ void **priv_data,
+ size_t priv_data_align,
+ size_t priv_data_size,
+ void *priv_data_init,
+ void *buf_addr, size_t subbuf_size,
size_t num_subbuf, unsigned int switch_timer_interval,
unsigned int read_timer_interval,
- int *shm_fd, int *wait_fd, uint64_t *memory_map_size)
+ int **shm_fd, int **wait_fd, uint64_t **memory_map_size)
{
int ret, cpu;
- size_t shmsize;
+ size_t shmsize, chansize;
struct channel *chan;
struct lttng_ust_shm_handle *handle;
struct shm_object *shmobj;
shmsize += sizeof(struct lttng_ust_lib_ring_buffer_shmp) * num_possible_cpus();
else
shmsize += sizeof(struct lttng_ust_lib_ring_buffer_shmp);
+ chansize = shmsize;
+ shmsize += offset_align(shmsize, priv_data_align);
+ shmsize += priv_data_size;
shmobj = shm_object_table_append(handle->table, shmsize);
if (!shmobj)
goto error_append;
/* struct channel is at object 0, offset 0 (hardcoded) */
- set_shmp(handle->chan, zalloc_shm(shmobj, shmsize));
+ set_shmp(handle->chan, zalloc_shm(shmobj, chansize));
assert(handle->chan._ref.index == 0);
assert(handle->chan._ref.offset == 0);
chan = shmp(handle, handle->chan);
if (!chan)
goto error_append;
- ret = channel_backend_init(&chan->backend, name, config, priv,
+ /* space for private data */
+ if (priv_data_size) {
+ DECLARE_SHMP(void, priv_data_alloc);
+
+ align_shm(shmobj, priv_data_align);
+ chan->priv_data_offset = shmobj->allocated_len;
+ set_shmp(priv_data_alloc, zalloc_shm(shmobj, priv_data_size));
+ if (!shmp(handle, priv_data_alloc))
+ goto error_append;
+ *priv_data = channel_get_private(chan);
+ memcpy(*priv_data, priv_data_init, priv_data_size);
+ } else {
+ chan->priv_data_offset = -1;
+ *priv_data = NULL;
+ }
+
+ ret = channel_backend_init(&chan->backend, name, config,
subbuf_size, num_subbuf, handle);
if (ret)
goto error_backend_init;
* Call "destroy" callback, finalize channels, decrement the channel
* reference count. Note that when readers have completed data
* consumption of finalized channels, get_subbuf() will return -ENODATA.
- * They should release their handle at that point. Returns the private
- * data pointer.
+ * They should release their handle at that point.
*/
-void *channel_destroy(struct channel *chan, struct lttng_ust_shm_handle *handle,
+void channel_destroy(struct channel *chan, struct lttng_ust_shm_handle *handle,
int shadow)
{
- const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
- void *priv;
- int cpu;
-
if (shadow) {
channel_release(chan, handle, shadow);
- return NULL;
+ return;
}
channel_unregister_notifiers(chan, handle);
- if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) {
- for_each_channel_cpu(cpu, chan) {
- struct lttng_ust_lib_ring_buffer *buf = shmp(handle, chan->backend.buf[cpu].shmp);
-
- if (config->cb.buffer_finalize)
- config->cb.buffer_finalize(buf,
- chan->backend.priv,
- cpu, handle);
- if (buf->backend.allocated)
- lib_ring_buffer_switch_slow(buf, SWITCH_FLUSH,
- handle);
- /*
- * Perform flush before writing to finalized.
- */
- cmm_smp_wmb();
- CMM_ACCESS_ONCE(buf->finalized) = 1;
- //wake_up_interruptible(&buf->read_wait);
- }
- } else {
- struct lttng_ust_lib_ring_buffer *buf = shmp(handle, chan->backend.buf[0].shmp);
+ /*
+ * Note: the consumer takes care of finalizing and switching the
+ * buffers.
+ */
- if (config->cb.buffer_finalize)
- config->cb.buffer_finalize(buf, chan->backend.priv, -1, handle);
- if (buf->backend.allocated)
- lib_ring_buffer_switch_slow(buf, SWITCH_FLUSH,
- handle);
- /*
- * Perform flush before writing to finalized.
- */
- cmm_smp_wmb();
- CMM_ACCESS_ONCE(buf->finalized) = 1;
- //wake_up_interruptible(&buf->read_wait);
- }
- CMM_ACCESS_ONCE(chan->finalized) = 1;
- //wake_up_interruptible(&chan->hp_wait);
- //wake_up_interruptible(&chan->read_wait);
/*
* sessiond/consumer are keeping a reference on the shm file
* descriptor directly. No need to refcount.
*/
- priv = chan->backend.priv;
channel_release(chan, handle, shadow);
- return priv;
+ return;
}
struct lttng_ust_lib_ring_buffer *channel_get_ring_buffer(
const struct lttng_ust_lib_ring_buffer_config *config,
struct channel *chan, int cpu,
struct lttng_ust_shm_handle *handle,
- int *shm_fd, int *wait_fd,
- uint64_t *memory_map_size)
+ int **shm_fd, int **wait_fd,
+ uint64_t **memory_map_size)
{
struct shm_ref *ref;
*/
if (((commit_count - chan->backend.subbuf_size)
& chan->commit_count_mask)
- - (buf_trunc(consumed_cur, chan)
+ - (buf_trunc(consumed, chan)
>> chan->backend.num_subbuf_order)
!= 0)
goto nodata;
* Check that we are not about to read the same subbuffer in
* which the writer head is.
*/
- if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_cur, chan)
+ if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed, chan)
== 0)
goto nodata;
commit_count_sb = v_read(config, &shmp_index(handle, buf->commit_cold, cons_idx)->cc_sb);
if (subbuf_offset(commit_count, chan) != 0)
- ERRMSG("ring buffer %s, cpu %d: "
+ DBG("ring buffer %s, cpu %d: "
"commit count in subbuffer %lu,\n"
"expecting multiples of %lu bytes\n"
" [ %lu bytes committed, %lu bytes reader-visible ]\n",
chan->backend.subbuf_size,
commit_count, commit_count_sb);
- ERRMSG("ring buffer: %s, cpu %d: %lu bytes committed\n",
+ DBG("ring buffer: %s, cpu %d: %lu bytes committed\n",
chan->backend.name, cpu, commit_count);
}
const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
unsigned long write_offset, cons_offset;
- /*
- * Can be called in the error path of allocation when
- * trans_channel_data is not yet set.
- */
- if (!chan)
- return;
/*
* No need to order commit_count, write_offset and cons_offset reads
* because we execute at teardown when no more writer nor reader
write_offset = v_read(config, &buf->offset);
cons_offset = uatomic_read(&buf->consumed);
if (write_offset != cons_offset)
- ERRMSG("ring buffer %s, cpu %d: "
+ DBG("ring buffer %s, cpu %d: "
"non-consumed data\n"
" [ %lu bytes written, %lu bytes read ]\n",
chan->backend.name, cpu, write_offset, cons_offset);
struct lttng_ust_shm_handle *handle)
{
const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
- void *priv = chan->backend.priv;
-
- ERRMSG("ring buffer %s, cpu %d: %lu records written, "
- "%lu records overrun\n",
- chan->backend.name, cpu,
- v_read(config, &buf->records_count),
- v_read(config, &buf->records_overrun));
-
- if (v_read(config, &buf->records_lost_full)
- || v_read(config, &buf->records_lost_wrap)
- || v_read(config, &buf->records_lost_big))
- ERRMSG("ring buffer %s, cpu %d: records were lost. Caused by:\n"
- " [ %lu buffer full, %lu nest buffer wrap-around, "
- "%lu event too big ]\n",
- chan->backend.name, cpu,
- v_read(config, &buf->records_lost_full),
- v_read(config, &buf->records_lost_wrap),
- v_read(config, &buf->records_lost_big));
-
+ void *priv = channel_get_private(chan);
+
+ if (!strcmp(chan->backend.name, "relay-metadata-mmap")) {
+ DBG("ring buffer %s: %lu records written, "
+ "%lu records overrun\n",
+ chan->backend.name,
+ v_read(config, &buf->records_count),
+ v_read(config, &buf->records_overrun));
+ } else {
+ DBG("ring buffer %s, cpu %d: %lu records written, "
+ "%lu records overrun\n",
+ chan->backend.name, cpu,
+ v_read(config, &buf->records_count),
+ v_read(config, &buf->records_overrun));
+
+ if (v_read(config, &buf->records_lost_full)
+ || v_read(config, &buf->records_lost_wrap)
+ || v_read(config, &buf->records_lost_big))
+ DBG("ring buffer %s, cpu %d: records were lost. Caused by:\n"
+ " [ %lu buffer full, %lu nest buffer wrap-around, "
+ "%lu event too big ]\n",
+ chan->backend.name, cpu,
+ v_read(config, &buf->records_lost_full),
+ v_read(config, &buf->records_lost_wrap),
+ v_read(config, &buf->records_lost_big));
+ }
lib_ring_buffer_print_buffer_errors(buf, chan, priv, cpu, handle);
}
void lib_ring_buffer_switch_old_start(struct lttng_ust_lib_ring_buffer *buf,
struct channel *chan,
struct switch_offsets *offsets,
- u64 tsc,
+ uint64_t tsc,
struct lttng_ust_shm_handle *handle)
{
const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
void lib_ring_buffer_switch_old_end(struct lttng_ust_lib_ring_buffer *buf,
struct channel *chan,
struct switch_offsets *offsets,
- u64 tsc,
+ uint64_t tsc,
struct lttng_ust_shm_handle *handle)
{
const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
void lib_ring_buffer_switch_new_start(struct lttng_ust_lib_ring_buffer *buf,
struct channel *chan,
struct switch_offsets *offsets,
- u64 tsc,
+ uint64_t tsc,
struct lttng_ust_shm_handle *handle)
{
const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
void lib_ring_buffer_switch_new_end(struct lttng_ust_lib_ring_buffer *buf,
struct channel *chan,
struct switch_offsets *offsets,
- u64 tsc,
+ uint64_t tsc,
struct lttng_ust_shm_handle *handle)
{
const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
struct lttng_ust_lib_ring_buffer *buf,
struct channel *chan,
struct switch_offsets *offsets,
- u64 *tsc)
+ uint64_t *tsc,
+ struct lttng_ust_shm_handle *handle)
{
const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
- unsigned long off;
+ unsigned long off, reserve_commit_diff;
offsets->begin = v_read(config, &buf->offset);
offsets->old = offsets->begin;
* timestamps) are visible to the reader. This is required for
* quiescence guarantees for the fusion merge.
*/
- if (mode == SWITCH_FLUSH || off > 0) {
- if (caa_unlikely(off == 0)) {
- /*
- * The client does not save any header information.
- * Don't switch empty subbuffer on finalize, because it
- * is invalid to deliver a completely empty subbuffer.
- */
- if (!config->cb.subbuffer_header_size())
+ if (mode != SWITCH_FLUSH && !off)
+ return -1; /* we do not have to switch : buffer is empty */
+
+ if (caa_unlikely(off == 0)) {
+ unsigned long sb_index, commit_count;
+
+ /*
+ * We are performing a SWITCH_FLUSH. At this stage, there are no
+ * concurrent writes into the buffer.
+ *
+ * The client does not save any header information. Don't
+ * switch empty subbuffer on finalize, because it is invalid to
+ * deliver a completely empty subbuffer.
+ */
+ if (!config->cb.subbuffer_header_size())
+ return -1;
+
+ /* Test new buffer integrity */
+ sb_index = subbuf_index(offsets->begin, chan);
+ commit_count = v_read(config,
+ &shmp_index(handle, buf->commit_cold,
+ sb_index)->cc_sb);
+ reserve_commit_diff =
+ (buf_trunc(offsets->begin, chan)
+ >> chan->backend.num_subbuf_order)
+ - (commit_count & chan->commit_count_mask);
+ if (caa_likely(reserve_commit_diff == 0)) {
+ /* Next subbuffer not being written to. */
+ if (caa_unlikely(config->mode != RING_BUFFER_OVERWRITE &&
+ subbuf_trunc(offsets->begin, chan)
+ - subbuf_trunc((unsigned long)
+ uatomic_read(&buf->consumed), chan)
+ >= chan->backend.buf_size)) {
+ /*
+ * We do not overwrite non consumed buffers
+ * and we are full : don't switch.
+ */
return -1;
+ } else {
+ /*
+ * Next subbuffer not being written to, and we
+ * are either in overwrite mode or the buffer is
+ * not full. It's safe to write in this new
+ * subbuffer.
+ */
+ }
+ } else {
/*
- * Need to write the subbuffer start header on finalize.
+ * Next subbuffer reserve offset does not match the
+ * commit offset. Don't perform switch in
+ * producer-consumer and overwrite mode. Caused by
+ * either a writer OOPS or too many nested writes over a
+ * reserve/commit pair.
*/
- offsets->switch_old_start = 1;
+ return -1;
}
- offsets->begin = subbuf_align(offsets->begin, chan);
- } else
- return -1; /* we do not have to switch : buffer is empty */
+
+ /*
+ * Need to write the subbuffer start header on finalize.
+ */
+ offsets->switch_old_start = 1;
+ }
+ offsets->begin = subbuf_align(offsets->begin, chan);
/* Note: old points to the next subbuf at offset 0 */
offsets->end = offsets->begin;
return 0;
const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
struct switch_offsets offsets;
unsigned long oldidx;
- u64 tsc;
+ uint64_t tsc;
offsets.size = 0;
*/
do {
if (lib_ring_buffer_try_switch_slow(mode, buf, chan, &offsets,
- &tsc))
+ &tsc, handle))
return; /* Switch not needed */
} while (v_cmpxchg(config, &buf->offset, offsets.old, offsets.end)
!= offsets.old);
{
const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config;
struct lttng_ust_shm_handle *handle = ctx->handle;
- unsigned long reserve_commit_diff;
+ unsigned long reserve_commit_diff, offset_cmp;
- offsets->begin = v_read(config, &buf->offset);
+retry:
+ offsets->begin = offset_cmp = v_read(config, &buf->offset);
offsets->old = offsets->begin;
offsets->switch_new_start = 0;
offsets->switch_new_end = 0;
}
}
if (caa_unlikely(offsets->switch_new_start)) {
- unsigned long sb_index;
+ unsigned long sb_index, commit_count;
/*
* We are typically not filling the previous buffer completely.
+ config->cb.subbuffer_header_size();
/* Test new buffer integrity */
sb_index = subbuf_index(offsets->begin, chan);
+ /*
+ * Read buf->offset before buf->commit_cold[sb_index].cc_sb.
+ * lib_ring_buffer_check_deliver() has the matching
+ * memory barriers required around commit_cold cc_sb
+ * updates to ensure reserve and commit counter updates
+ * are not seen reordered when updated by another CPU.
+ */
+ cmm_smp_rmb();
+ commit_count = v_read(config,
+ &shmp_index(handle, buf->commit_cold,
+ sb_index)->cc_sb);
+ /* Read buf->commit_cold[sb_index].cc_sb before buf->offset. */
+ cmm_smp_rmb();
+ if (caa_unlikely(offset_cmp != v_read(config, &buf->offset))) {
+ /*
+ * The reserve counter have been concurrently updated
+ * while we read the commit counter. This means the
+ * commit counter we read might not match buf->offset
+ * due to concurrent update. We therefore need to retry.
+ */
+ goto retry;
+ }
reserve_commit_diff =
(buf_trunc(offsets->begin, chan)
>> chan->backend.num_subbuf_order)
- - ((unsigned long) v_read(config,
- &shmp_index(handle, buf->commit_cold, sb_index)->cc_sb)
- & chan->commit_count_mask);
+ - (commit_count & chan->commit_count_mask);
if (caa_likely(reserve_commit_diff == 0)) {
/* Next subbuffer not being written to. */
if (caa_unlikely(config->mode != RING_BUFFER_OVERWRITE &&
- subbuf_trunc((unsigned long)
uatomic_read(&buf->consumed), chan)
>= chan->backend.buf_size)) {
+ unsigned long nr_lost;
+
/*
* We do not overwrite non consumed buffers
* and we are full : record is lost.
*/
+ nr_lost = v_read(config, &buf->records_lost_full);
v_inc(config, &buf->records_lost_full);
+ if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) {
+ DBG("%lu or more records lost in (%s:%d) (buffer full)\n",
+ nr_lost + 1, chan->backend.name,
+ buf->backend.cpu);
+ }
return -ENOBUFS;
} else {
/*
*/
}
} else {
+ unsigned long nr_lost;
+
/*
* Next subbuffer reserve offset does not match the
- * commit offset. Drop record in producer-consumer and
+ * commit offset, and this did not involve update to the
+ * reserve counter. Drop record in producer-consumer and
* overwrite mode. Caused by either a writer OOPS or too
* many nested writes over a reserve/commit pair.
*/
+ nr_lost = v_read(config, &buf->records_lost_wrap);
v_inc(config, &buf->records_lost_wrap);
+ if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) {
+ DBG("%lu or more records lost in (%s:%d) (wrap-around)\n",
+ nr_lost + 1, chan->backend.name,
+ buf->backend.cpu);
+ }
return -EIO;
}
offsets->size =
+ ctx->data_size;
if (caa_unlikely(subbuf_offset(offsets->begin, chan)
+ offsets->size > chan->backend.subbuf_size)) {
+ unsigned long nr_lost;
+
/*
* Record too big for subbuffers, report error, don't
* complete the sub-buffer switch.
*/
+ nr_lost = v_read(config, &buf->records_lost_big);
v_inc(config, &buf->records_lost_big);
+ if ((nr_lost & (DBG_PRINT_NR_LOST - 1)) == 0) {
+ DBG("%lu or more records lost in (%s:%d) record size "
+ " of %zu bytes is too large for buffer\n",
+ nr_lost + 1, chan->backend.name,
+ buf->backend.cpu, offsets->size);
+ }
return -ENOSPC;
} else {
/*
ctx->buf_offset = offsets.begin + offsets.pre_header_padding;
return 0;
}
+
+/*
+ * Force a read (imply TLS fixup for dlopen) of TLS variables.
+ */
+void lttng_fixup_ringbuffer_tls(void)
+{
+ asm volatile ("" : : "m" (lib_ring_buffer_nesting));
+}