X-Git-Url: http://git.lttng.org/?a=blobdiff_plain;f=lib%2Fringbuffer%2Fring_buffer_frontend.c;h=5d4be3b3dcf4ddc60920377c3d1f160ceb629261;hb=fbe84fd748b6fde1f773da8e44d61235f8d4c232;hp=c4b797ce8af76faf9ba5209d054b89fb42326656;hpb=e6b06d7d6346a5e3cfcdd214d171af067bca0f34;p=lttng-modules.git diff --git a/lib/ringbuffer/ring_buffer_frontend.c b/lib/ringbuffer/ring_buffer_frontend.c index c4b797ce..5d4be3b3 100644 --- a/lib/ringbuffer/ring_buffer_frontend.c +++ b/lib/ringbuffer/ring_buffer_frontend.c @@ -55,13 +55,15 @@ #include #include -#include "../../wrapper/ringbuffer/config.h" -#include "../../wrapper/ringbuffer/backend.h" -#include "../../wrapper/ringbuffer/frontend.h" -#include "../../wrapper/ringbuffer/iterator.h" -#include "../../wrapper/ringbuffer/nohz.h" -#include "../../wrapper/atomic.h" -#include "../../wrapper/percpu-defs.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include /* * Internal structure representing offsets to use at a sub-buffer switch. @@ -91,6 +93,50 @@ EXPORT_PER_CPU_SYMBOL(lib_ring_buffer_nesting); static void lib_ring_buffer_print_errors(struct channel *chan, struct lib_ring_buffer *buf, int cpu); +static +void _lib_ring_buffer_switch_remote(struct lib_ring_buffer *buf, + enum switch_mode mode); + +static +int lib_ring_buffer_poll_deliver(const struct lib_ring_buffer_config *config, + struct lib_ring_buffer *buf, + struct channel *chan) +{ + unsigned long consumed_old, consumed_idx, commit_count, write_offset; + + consumed_old = atomic_long_read(&buf->consumed); + consumed_idx = subbuf_index(consumed_old, chan); + commit_count = v_read(config, &buf->commit_cold[consumed_idx].cc_sb); + /* + * No memory barrier here, since we are only interested + * in a statistically correct polling result. The next poll will + * get the data is we are racing. The mb() that ensures correct + * memory order is in get_subbuf. + */ + write_offset = v_read(config, &buf->offset); + + /* + * Check that the subbuffer we are trying to consume has been + * already fully committed. + */ + + if (((commit_count - chan->backend.subbuf_size) + & chan->commit_count_mask) + - (buf_trunc(consumed_old, chan) + >> chan->backend.num_subbuf_order) + != 0) + return 0; + + /* + * Check that we are not about to read the same subbuffer in + * which the writer head is. + */ + if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_old, chan) + == 0) + return 0; + + return 1; +} /* * Must be called under cpu hotplug protection. @@ -201,7 +247,8 @@ int lib_ring_buffer_create(struct lib_ring_buffer *buf, kzalloc_node(ALIGN(sizeof(*buf->commit_hot) * chan->backend.num_subbuf, 1 << INTERNODE_CACHE_SHIFT), - GFP_KERNEL, cpu_to_node(max(cpu, 0))); + GFP_KERNEL | __GFP_NOWARN, + cpu_to_node(max(cpu, 0))); if (!buf->commit_hot) { ret = -ENOMEM; goto free_chanbuf; @@ -211,7 +258,8 @@ int lib_ring_buffer_create(struct lib_ring_buffer *buf, kzalloc_node(ALIGN(sizeof(*buf->commit_cold) * chan->backend.num_subbuf, 1 << INTERNODE_CACHE_SHIFT), - GFP_KERNEL, cpu_to_node(max(cpu, 0))); + GFP_KERNEL | __GFP_NOWARN, + cpu_to_node(max(cpu, 0))); if (!buf->commit_cold) { ret = -ENOMEM; goto free_commit; @@ -277,7 +325,7 @@ static void switch_buffer_timer(unsigned long data) lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE); if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) - mod_timer_pinned(&buf->switch_timer, + lttng_mod_timer_pinned(&buf->switch_timer, jiffies + chan->switch_timer_interval); else mod_timer(&buf->switch_timer, @@ -294,7 +342,12 @@ static void lib_ring_buffer_start_switch_timer(struct lib_ring_buffer *buf) if (!chan->switch_timer_interval || buf->switch_timer_enabled) return; - init_timer(&buf->switch_timer); + + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) + lttng_init_timer_pinned(&buf->switch_timer); + else + init_timer(&buf->switch_timer); + buf->switch_timer.function = switch_buffer_timer; buf->switch_timer.expires = jiffies + chan->switch_timer_interval; buf->switch_timer.data = (unsigned long)buf; @@ -337,7 +390,7 @@ static void read_buffer_timer(unsigned long data) } if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) - mod_timer_pinned(&buf->read_timer, + lttng_mod_timer_pinned(&buf->read_timer, jiffies + chan->read_timer_interval); else mod_timer(&buf->read_timer, @@ -357,7 +410,11 @@ static void lib_ring_buffer_start_read_timer(struct lib_ring_buffer *buf) || buf->read_timer_enabled) return; - init_timer(&buf->read_timer); + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) + lttng_init_timer_pinned(&buf->read_timer); + else + init_timer(&buf->read_timer); + buf->read_timer.function = read_buffer_timer; buf->read_timer.expires = jiffies + chan->read_timer_interval; buf->read_timer.data = (unsigned long)buf; @@ -585,6 +642,63 @@ static void channel_unregister_notifiers(struct channel *chan) channel_backend_unregister_notifiers(&chan->backend); } +static void lib_ring_buffer_set_quiescent(struct lib_ring_buffer *buf) +{ + if (!buf->quiescent) { + buf->quiescent = true; + _lib_ring_buffer_switch_remote(buf, SWITCH_FLUSH); + } +} + +static void lib_ring_buffer_clear_quiescent(struct lib_ring_buffer *buf) +{ + buf->quiescent = false; +} + +void lib_ring_buffer_set_quiescent_channel(struct channel *chan) +{ + int cpu; + const struct lib_ring_buffer_config *config = &chan->backend.config; + + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) { + get_online_cpus(); + for_each_channel_cpu(cpu, chan) { + struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, + cpu); + + lib_ring_buffer_set_quiescent(buf); + } + put_online_cpus(); + } else { + struct lib_ring_buffer *buf = chan->backend.buf; + + lib_ring_buffer_set_quiescent(buf); + } +} +EXPORT_SYMBOL_GPL(lib_ring_buffer_set_quiescent_channel); + +void lib_ring_buffer_clear_quiescent_channel(struct channel *chan) +{ + int cpu; + const struct lib_ring_buffer_config *config = &chan->backend.config; + + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) { + get_online_cpus(); + for_each_channel_cpu(cpu, chan) { + struct lib_ring_buffer *buf = per_cpu_ptr(chan->backend.buf, + cpu); + + lib_ring_buffer_clear_quiescent(buf); + } + put_online_cpus(); + } else { + struct lib_ring_buffer *buf = chan->backend.buf; + + lib_ring_buffer_clear_quiescent(buf); + } +} +EXPORT_SYMBOL_GPL(lib_ring_buffer_clear_quiescent_channel); + static void channel_free(struct channel *chan) { if (chan->backend.release_priv_ops) { @@ -745,7 +859,7 @@ void *channel_destroy(struct channel *chan) chan->backend.priv, cpu); if (buf->backend.allocated) - lib_ring_buffer_switch_slow(buf, SWITCH_FLUSH); + lib_ring_buffer_set_quiescent(buf); /* * Perform flush before writing to finalized. */ @@ -759,7 +873,7 @@ void *channel_destroy(struct channel *chan) if (config->cb.buffer_finalize) config->cb.buffer_finalize(buf, chan->backend.priv, -1); if (buf->backend.allocated) - lib_ring_buffer_switch_slow(buf, SWITCH_FLUSH); + lib_ring_buffer_set_quiescent(buf); /* * Perform flush before writing to finalized. */ @@ -793,7 +907,10 @@ int lib_ring_buffer_open_read(struct lib_ring_buffer *buf) if (!atomic_long_add_unless(&buf->active_readers, 1, 1)) return -EBUSY; - kref_get(&chan->ref); + if (!lttng_kref_get(&chan->ref)) { + atomic_long_dec(&buf->active_readers); + return -EOVERFLOW; + } lttng_smp_mb__after_atomic(); return 0; } @@ -1229,7 +1346,8 @@ void lib_ring_buffer_print_errors(struct channel *chan, /* * lib_ring_buffer_switch_old_start: Populate old subbuffer header. * - * Only executed when the buffer is finalized, in SWITCH_FLUSH. + * Only executed by SWITCH_FLUSH, which can be issued while tracing is active + * or at buffer finalization (destroy). */ static void lib_ring_buffer_switch_old_start(struct lib_ring_buffer *buf, @@ -1240,6 +1358,7 @@ void lib_ring_buffer_switch_old_start(struct lib_ring_buffer *buf, const struct lib_ring_buffer_config *config = &chan->backend.config; unsigned long oldidx = subbuf_index(offsets->old, chan); unsigned long commit_count; + struct commit_counters_hot *cc_hot; config->cb.buffer_begin(buf, tsc, oldidx); @@ -1256,15 +1375,15 @@ void lib_ring_buffer_switch_old_start(struct lib_ring_buffer *buf, barrier(); } else smp_wmb(); - v_add(config, config->cb.subbuffer_header_size(), - &buf->commit_hot[oldidx].cc); - commit_count = v_read(config, &buf->commit_hot[oldidx].cc); + cc_hot = &buf->commit_hot[oldidx]; + v_add(config, config->cb.subbuffer_header_size(), &cc_hot->cc); + commit_count = v_read(config, &cc_hot->cc); /* Check if the written buffer has to be delivered */ lib_ring_buffer_check_deliver(config, buf, chan, offsets->old, commit_count, oldidx, tsc); - lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx, + lib_ring_buffer_write_commit_counter(config, buf, chan, offsets->old + config->cb.subbuffer_header_size(), - commit_count); + commit_count, cc_hot); } /* @@ -1284,6 +1403,7 @@ void lib_ring_buffer_switch_old_end(struct lib_ring_buffer *buf, const struct lib_ring_buffer_config *config = &chan->backend.config; unsigned long oldidx = subbuf_index(offsets->old - 1, chan); unsigned long commit_count, padding_size, data_size; + struct commit_counters_hot *cc_hot; data_size = subbuf_offset(offsets->old - 1, chan) + 1; padding_size = chan->backend.subbuf_size - data_size; @@ -1302,12 +1422,14 @@ void lib_ring_buffer_switch_old_end(struct lib_ring_buffer *buf, barrier(); } else smp_wmb(); - v_add(config, padding_size, &buf->commit_hot[oldidx].cc); - commit_count = v_read(config, &buf->commit_hot[oldidx].cc); + cc_hot = &buf->commit_hot[oldidx]; + v_add(config, padding_size, &cc_hot->cc); + commit_count = v_read(config, &cc_hot->cc); lib_ring_buffer_check_deliver(config, buf, chan, offsets->old - 1, commit_count, oldidx, tsc); - lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx, - offsets->old + padding_size, commit_count); + lib_ring_buffer_write_commit_counter(config, buf, chan, + offsets->old + padding_size, commit_count, + cc_hot); } /* @@ -1326,6 +1448,7 @@ void lib_ring_buffer_switch_new_start(struct lib_ring_buffer *buf, const struct lib_ring_buffer_config *config = &chan->backend.config; unsigned long beginidx = subbuf_index(offsets->begin, chan); unsigned long commit_count; + struct commit_counters_hot *cc_hot; config->cb.buffer_begin(buf, tsc, beginidx); @@ -1342,15 +1465,15 @@ void lib_ring_buffer_switch_new_start(struct lib_ring_buffer *buf, barrier(); } else smp_wmb(); - v_add(config, config->cb.subbuffer_header_size(), - &buf->commit_hot[beginidx].cc); - commit_count = v_read(config, &buf->commit_hot[beginidx].cc); + cc_hot = &buf->commit_hot[beginidx]; + v_add(config, config->cb.subbuffer_header_size(), &cc_hot->cc); + commit_count = v_read(config, &cc_hot->cc); /* Check if the written buffer has to be delivered */ lib_ring_buffer_check_deliver(config, buf, chan, offsets->begin, commit_count, beginidx, tsc); - lib_ring_buffer_write_commit_counter(config, buf, chan, beginidx, + lib_ring_buffer_write_commit_counter(config, buf, chan, offsets->begin + config->cb.subbuffer_header_size(), - commit_count); + commit_count, cc_hot); } /* @@ -1420,12 +1543,14 @@ int lib_ring_buffer_try_switch_slow(enum switch_mode mode, unsigned long sb_index, commit_count; /* - * We are performing a SWITCH_FLUSH. At this stage, there are no - * concurrent writes into the buffer. + * We are performing a SWITCH_FLUSH. There may be concurrent + * writes into the buffer if e.g. invoked while performing a + * snapshot on an active trace. * - * The client does not save any header information. Don't - * switch empty subbuffer on finalize, because it is invalid to - * deliver a completely empty subbuffer. + * If the client does not save any header information (sub-buffer + * header size == 0), don't switch empty subbuffer on finalize, + * because it is invalid to deliver a completely empty + * subbuffer. */ if (!config->cb.subbuffer_header_size()) return -1; @@ -1539,24 +1664,32 @@ void lib_ring_buffer_switch_slow(struct lib_ring_buffer *buf, enum switch_mode m } EXPORT_SYMBOL_GPL(lib_ring_buffer_switch_slow); +struct switch_param { + struct lib_ring_buffer *buf; + enum switch_mode mode; +}; + static void remote_switch(void *info) { - struct lib_ring_buffer *buf = info; + struct switch_param *param = info; + struct lib_ring_buffer *buf = param->buf; - lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE); + lib_ring_buffer_switch_slow(buf, param->mode); } -void lib_ring_buffer_switch_remote(struct lib_ring_buffer *buf) +static void _lib_ring_buffer_switch_remote(struct lib_ring_buffer *buf, + enum switch_mode mode) { struct channel *chan = buf->backend.chan; const struct lib_ring_buffer_config *config = &chan->backend.config; int ret; + struct switch_param param; /* * With global synchronization we don't need to use the IPI scheme. */ if (config->sync == RING_BUFFER_SYNC_GLOBAL) { - lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE); + lib_ring_buffer_switch_slow(buf, mode); return; } @@ -1571,16 +1704,30 @@ void lib_ring_buffer_switch_remote(struct lib_ring_buffer *buf) * switch. */ get_online_cpus(); + param.buf = buf; + param.mode = mode; ret = smp_call_function_single(buf->backend.cpu, - remote_switch, buf, 1); + remote_switch, ¶m, 1); if (ret) { /* Remote CPU is offline, do it ourself. */ - lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE); + lib_ring_buffer_switch_slow(buf, mode); } put_online_cpus(); } + +void lib_ring_buffer_switch_remote(struct lib_ring_buffer *buf) +{ + _lib_ring_buffer_switch_remote(buf, SWITCH_ACTIVE); +} EXPORT_SYMBOL_GPL(lib_ring_buffer_switch_remote); +/* Switch sub-buffer even if current sub-buffer is empty. */ +void lib_ring_buffer_switch_remote_empty(struct lib_ring_buffer *buf) +{ + _lib_ring_buffer_switch_remote(buf, SWITCH_FLUSH); +} +EXPORT_SYMBOL_GPL(lib_ring_buffer_switch_remote_empty); + /* * Returns : * 0 if ok @@ -1739,6 +1886,25 @@ retry: return 0; } +static struct lib_ring_buffer *get_current_buf(struct channel *chan, int cpu) +{ + const struct lib_ring_buffer_config *config = &chan->backend.config; + + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) + return per_cpu_ptr(chan->backend.buf, cpu); + else + return chan->backend.buf; +} + +void lib_ring_buffer_lost_event_too_big(struct channel *chan) +{ + const struct lib_ring_buffer_config *config = &chan->backend.config; + struct lib_ring_buffer *buf = get_current_buf(chan, smp_processor_id()); + + v_inc(config, &buf->records_lost_big); +} +EXPORT_SYMBOL_GPL(lib_ring_buffer_lost_event_too_big); + /** * lib_ring_buffer_reserve_slow - Atomic slot reservation in a buffer. * @ctx: ring buffer context. @@ -1755,12 +1921,7 @@ int lib_ring_buffer_reserve_slow(struct lib_ring_buffer_ctx *ctx) struct switch_offsets offsets; int ret; - if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) - buf = per_cpu_ptr(chan->backend.buf, ctx->cpu); - else - buf = chan->backend.buf; - ctx->buf = buf; - + ctx->buf = buf = get_current_buf(chan, ctx->cpu); offsets.size = 0; do { @@ -1816,6 +1977,147 @@ int lib_ring_buffer_reserve_slow(struct lib_ring_buffer_ctx *ctx) } EXPORT_SYMBOL_GPL(lib_ring_buffer_reserve_slow); +static +void lib_ring_buffer_vmcore_check_deliver(const struct lib_ring_buffer_config *config, + struct lib_ring_buffer *buf, + unsigned long commit_count, + unsigned long idx) +{ + if (config->oops == RING_BUFFER_OOPS_CONSISTENCY) + v_set(config, &buf->commit_hot[idx].seq, commit_count); +} + +/* + * The ring buffer can count events recorded and overwritten per buffer, + * but it is disabled by default due to its performance overhead. + */ +#ifdef LTTNG_RING_BUFFER_COUNT_EVENTS +static +void deliver_count_events(const struct lib_ring_buffer_config *config, + struct lib_ring_buffer *buf, + unsigned long idx) +{ + v_add(config, subbuffer_get_records_count(config, + &buf->backend, idx), + &buf->records_count); + v_add(config, subbuffer_count_records_overrun(config, + &buf->backend, idx), + &buf->records_overrun); +} +#else /* LTTNG_RING_BUFFER_COUNT_EVENTS */ +static +void deliver_count_events(const struct lib_ring_buffer_config *config, + struct lib_ring_buffer *buf, + unsigned long idx) +{ +} +#endif /* #else LTTNG_RING_BUFFER_COUNT_EVENTS */ + + +void lib_ring_buffer_check_deliver_slow(const struct lib_ring_buffer_config *config, + struct lib_ring_buffer *buf, + struct channel *chan, + unsigned long offset, + unsigned long commit_count, + unsigned long idx, + u64 tsc) +{ + unsigned long old_commit_count = commit_count + - chan->backend.subbuf_size; + + /* + * If we succeeded at updating cc_sb below, we are the subbuffer + * writer delivering the subbuffer. Deals with concurrent + * updates of the "cc" value without adding a add_return atomic + * operation to the fast path. + * + * We are doing the delivery in two steps: + * - First, we cmpxchg() cc_sb to the new value + * old_commit_count + 1. This ensures that we are the only + * subbuffer user successfully filling the subbuffer, but we + * do _not_ set the cc_sb value to "commit_count" yet. + * Therefore, other writers that would wrap around the ring + * buffer and try to start writing to our subbuffer would + * have to drop records, because it would appear as + * non-filled. + * We therefore have exclusive access to the subbuffer control + * structures. This mutual exclusion with other writers is + * crucially important to perform record overruns count in + * flight recorder mode locklessly. + * - When we are ready to release the subbuffer (either for + * reading or for overrun by other writers), we simply set the + * cc_sb value to "commit_count" and perform delivery. + * + * The subbuffer size is least 2 bytes (minimum size: 1 page). + * This guarantees that old_commit_count + 1 != commit_count. + */ + + /* + * Order prior updates to reserve count prior to the + * commit_cold cc_sb update. + */ + smp_wmb(); + if (likely(v_cmpxchg(config, &buf->commit_cold[idx].cc_sb, + old_commit_count, old_commit_count + 1) + == old_commit_count)) { + /* + * Start of exclusive subbuffer access. We are + * guaranteed to be the last writer in this subbuffer + * and any other writer trying to access this subbuffer + * in this state is required to drop records. + */ + deliver_count_events(config, buf, idx); + config->cb.buffer_end(buf, tsc, idx, + lib_ring_buffer_get_data_size(config, + buf, + idx)); + + /* + * Increment the packet counter while we have exclusive + * access. + */ + subbuffer_inc_packet_count(config, &buf->backend, idx); + + /* + * Set noref flag and offset for this subbuffer id. + * Contains a memory barrier that ensures counter stores + * are ordered before set noref and offset. + */ + lib_ring_buffer_set_noref_offset(config, &buf->backend, idx, + buf_trunc_val(offset, chan)); + + /* + * Order set_noref and record counter updates before the + * end of subbuffer exclusive access. Orders with + * respect to writers coming into the subbuffer after + * wrap around, and also order wrt concurrent readers. + */ + smp_mb(); + /* End of exclusive subbuffer access */ + v_set(config, &buf->commit_cold[idx].cc_sb, + commit_count); + /* + * Order later updates to reserve count after + * the commit_cold cc_sb update. + */ + smp_wmb(); + lib_ring_buffer_vmcore_check_deliver(config, buf, + commit_count, idx); + + /* + * RING_BUFFER_WAKEUP_BY_WRITER wakeup is not lock-free. + */ + if (config->wakeup == RING_BUFFER_WAKEUP_BY_WRITER + && atomic_long_read(&buf->active_readers) + && lib_ring_buffer_poll_deliver(config, buf, chan)) { + wake_up_interruptible(&buf->read_wait); + wake_up_interruptible(&chan->read_wait); + } + + } +} +EXPORT_SYMBOL_GPL(lib_ring_buffer_check_deliver_slow); + int __init init_lib_ring_buffer_frontend(void) { int cpu;