X-Git-Url: http://git.lttng.org/?a=blobdiff_plain;f=libringbuffer%2Fring_buffer_frontend.c;h=76d6ec746c40ade48905bcfbf0cd3c81274d812e;hb=e56bb47c3af00db3bc9e2dea711bef4882e6ef4c;hp=c7394b1fd668d6e307e251221fcb1d79df07fb9e;hpb=34daae3e48423fc35b363e19136e324f9e31b05a;p=lttng-ust.git diff --git a/libringbuffer/ring_buffer_frontend.c b/libringbuffer/ring_buffer_frontend.c index c7394b1f..76d6ec74 100644 --- a/libringbuffer/ring_buffer_frontend.c +++ b/libringbuffer/ring_buffer_frontend.c @@ -52,6 +52,7 @@ */ #define _GNU_SOURCE +#define _LGPL_SOURCE #include #include #include @@ -59,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -71,13 +73,9 @@ #include "backend.h" #include "frontend.h" #include "shm.h" -#include "tlsfixup.h" +#include "rb-init.h" #include "../liblttng-ust/compat.h" /* For ENODATA */ -#ifndef max -#define max(a, b) ((a) > (b) ? (a) : (b)) -#endif - /* Print DBG() messages about events lost only every 1048576 hits */ #define DBG_PRINT_NR_LOST (1UL << 20) @@ -87,6 +85,7 @@ #define CLOCKID CLOCK_MONOTONIC #define LTTNG_UST_RING_BUFFER_GET_RETRY 10 #define LTTNG_UST_RING_BUFFER_RETRY_DELAY_MS 10 +#define RETRY_DELAY_MS 100 /* 100 ms. */ /* * Non-static to ensure the compiler does not optimize away the xor. @@ -152,6 +151,14 @@ static struct timer_signal_data timer_signal = { .lock = PTHREAD_MUTEX_INITIALIZER, }; +int lttng_ust_blocking_retry_timeout = + CONFIG_LTTNG_UST_DEFAULT_BLOCKING_RETRY_TIMEOUT_MS; + +void lttng_ust_ringbuffer_set_retry_timeout(int timeout) +{ + lttng_ust_blocking_retry_timeout = timeout; +} + /** * lib_ring_buffer_reset - Reset ring buffer to initial values. * @buf: Ring buffer. @@ -170,7 +177,7 @@ void lib_ring_buffer_reset(struct lttng_ust_lib_ring_buffer *buf, chan = shmp(handle, buf->backend.chan); if (!chan) - abort(); + return; config = &chan->backend.config; /* * Reset iterator first. It will put the subbuffer if it currently holds @@ -178,9 +185,18 @@ void lib_ring_buffer_reset(struct lttng_ust_lib_ring_buffer *buf, */ v_set(config, &buf->offset, 0); for (i = 0; i < chan->backend.num_subbuf; i++) { - v_set(config, &shmp_index(handle, buf->commit_hot, i)->cc, 0); - v_set(config, &shmp_index(handle, buf->commit_hot, i)->seq, 0); - v_set(config, &shmp_index(handle, buf->commit_cold, i)->cc_sb, 0); + struct commit_counters_hot *cc_hot; + struct commit_counters_cold *cc_cold; + + cc_hot = shmp_index(handle, buf->commit_hot, i); + if (!cc_hot) + return; + cc_cold = shmp_index(handle, buf->commit_cold, i); + if (!cc_cold) + return; + v_set(config, &cc_hot->cc, 0); + v_set(config, &cc_hot->seq, 0); + v_set(config, &cc_cold->cc_sb, 0); } uatomic_set(&buf->consumed, 0); uatomic_set(&buf->record_disabled, 0); @@ -316,6 +332,9 @@ int lib_ring_buffer_create(struct lttng_ust_lib_ring_buffer *buf, { const struct lttng_ust_lib_ring_buffer_config *config = &chanb->config; struct channel *chan = caa_container_of(chanb, struct channel, backend); + struct lttng_ust_lib_ring_buffer_backend_subbuffer *wsb; + struct channel *shmp_chan; + struct commit_counters_hot *cc_hot; void *priv = channel_get_private(chan); size_t subbuf_header_size; uint64_t tsc; @@ -354,11 +373,26 @@ int lib_ring_buffer_create(struct lttng_ust_lib_ring_buffer *buf, */ subbuf_header_size = config->cb.subbuffer_header_size(); v_set(config, &buf->offset, subbuf_header_size); - subbuffer_id_clear_noref(config, &shmp_index(handle, buf->backend.buf_wsb, 0)->id); - tsc = config->cb.ring_buffer_clock_read(shmp(handle, buf->backend.chan)); + wsb = shmp_index(handle, buf->backend.buf_wsb, 0); + if (!wsb) { + ret = -EPERM; + goto free_chanbuf; + } + subbuffer_id_clear_noref(config, &wsb->id); + shmp_chan = shmp(handle, buf->backend.chan); + if (!shmp_chan) { + ret = -EPERM; + goto free_chanbuf; + } + tsc = config->cb.ring_buffer_clock_read(shmp_chan); config->cb.buffer_begin(buf, tsc, 0, handle); - v_add(config, subbuf_header_size, &shmp_index(handle, buf->commit_hot, 0)->cc); - v_add(config, subbuf_header_size, &shmp_index(handle, buf->commit_hot, 0)->seq); + cc_hot = shmp_index(handle, buf->commit_hot, 0); + if (!cc_hot) { + ret = -EPERM; + goto free_chanbuf; + } + v_add(config, subbuf_header_size, &cc_hot->cc); + v_add(config, subbuf_header_size, &cc_hot->seq); if (config->cb.buffer_create) { ret = config->cb.buffer_create(buf, priv, cpu, chanb->name, handle); @@ -406,7 +440,7 @@ void lib_ring_buffer_channel_switch_timer(int sig, siginfo_t *si, void *uc) shmp(handle, chan->backend.buf[cpu].shmp); if (!buf) - abort(); + goto end; if (uatomic_read(&buf->active_readers)) lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE, chan->handle); @@ -416,15 +450,127 @@ void lib_ring_buffer_channel_switch_timer(int sig, siginfo_t *si, void *uc) shmp(handle, chan->backend.buf[0].shmp); if (!buf) - abort(); + goto end; if (uatomic_read(&buf->active_readers)) lib_ring_buffer_switch_slow(buf, SWITCH_ACTIVE, chan->handle); } +end: pthread_mutex_unlock(&wakeup_fd_mutex); return; } +static +int lib_ring_buffer_poll_deliver(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, + struct channel *chan, + struct lttng_ust_shm_handle *handle) +{ + unsigned long consumed_old, consumed_idx, commit_count, write_offset; + struct commit_counters_cold *cc_cold; + + consumed_old = uatomic_read(&buf->consumed); + consumed_idx = subbuf_index(consumed_old, chan); + cc_cold = shmp_index(handle, buf->commit_cold, consumed_idx); + if (!cc_cold) + return 0; + commit_count = v_read(config, &cc_cold->cc_sb); + /* + * No memory barrier here, since we are only interested + * in a statistically correct polling result. The next poll will + * get the data is we are racing. The mb() that ensures correct + * memory order is in get_subbuf. + */ + write_offset = v_read(config, &buf->offset); + + /* + * Check that the subbuffer we are trying to consume has been + * already fully committed. + */ + + if (((commit_count - chan->backend.subbuf_size) + & chan->commit_count_mask) + - (buf_trunc(consumed_old, chan) + >> chan->backend.num_subbuf_order) + != 0) + return 0; + + /* + * Check that we are not about to read the same subbuffer in + * which the writer head is. + */ + if (subbuf_trunc(write_offset, chan) - subbuf_trunc(consumed_old, chan) + == 0) + return 0; + + return 1; +} + +static +void lib_ring_buffer_wakeup(struct lttng_ust_lib_ring_buffer *buf, + struct lttng_ust_shm_handle *handle) +{ + int wakeup_fd = shm_get_wakeup_fd(handle, &buf->self._ref); + sigset_t sigpipe_set, pending_set, old_set; + int ret, sigpipe_was_pending = 0; + + if (wakeup_fd < 0) + return; + + /* + * Wake-up the other end by writing a null byte in the pipe + * (non-blocking). Important note: Because writing into the + * pipe is non-blocking (and therefore we allow dropping wakeup + * data, as long as there is wakeup data present in the pipe + * buffer to wake up the consumer), the consumer should perform + * the following sequence for waiting: + * 1) empty the pipe (reads). + * 2) check if there is data in the buffer. + * 3) wait on the pipe (poll). + * + * Discard the SIGPIPE from write(), not disturbing any SIGPIPE + * that might be already pending. If a bogus SIGPIPE is sent to + * the entire process concurrently by a malicious user, it may + * be simply discarded. + */ + ret = sigemptyset(&pending_set); + assert(!ret); + /* + * sigpending returns the mask of signals that are _both_ + * blocked for the thread _and_ pending for either the thread or + * the entire process. + */ + ret = sigpending(&pending_set); + assert(!ret); + sigpipe_was_pending = sigismember(&pending_set, SIGPIPE); + /* + * If sigpipe was pending, it means it was already blocked, so + * no need to block it. + */ + if (!sigpipe_was_pending) { + ret = sigemptyset(&sigpipe_set); + assert(!ret); + ret = sigaddset(&sigpipe_set, SIGPIPE); + assert(!ret); + ret = pthread_sigmask(SIG_BLOCK, &sigpipe_set, &old_set); + assert(!ret); + } + do { + ret = write(wakeup_fd, "", 1); + } while (ret == -1L && errno == EINTR); + if (ret == -1L && errno == EPIPE && !sigpipe_was_pending) { + struct timespec timeout = { 0, 0 }; + do { + ret = sigtimedwait(&sigpipe_set, NULL, + &timeout); + } while (ret == -1L && errno == EINTR); + } + if (!sigpipe_was_pending) { + ret = pthread_sigmask(SIG_SETMASK, &old_set, NULL); + assert(!ret); + } +} + static void lib_ring_buffer_channel_do_read(struct channel *chan) { @@ -445,7 +591,7 @@ void lib_ring_buffer_channel_do_read(struct channel *chan) shmp(handle, chan->backend.buf[cpu].shmp); if (!buf) - abort(); + goto end; if (uatomic_read(&buf->active_readers) && lib_ring_buffer_poll_deliver(config, buf, chan, handle)) { @@ -457,13 +603,14 @@ void lib_ring_buffer_channel_do_read(struct channel *chan) shmp(handle, chan->backend.buf[0].shmp); if (!buf) - abort(); + goto end; if (uatomic_read(&buf->active_readers) && lib_ring_buffer_poll_deliver(config, buf, chan, handle)) { lib_ring_buffer_wakeup(buf, handle); } } +end: pthread_mutex_unlock(&wakeup_fd_mutex); } @@ -642,7 +789,7 @@ void lib_ring_buffer_channel_switch_timer_start(struct channel *chan) } its.it_value.tv_sec = chan->switch_timer_interval / 1000000; - its.it_value.tv_nsec = chan->switch_timer_interval % 1000000; + its.it_value.tv_nsec = (chan->switch_timer_interval % 1000000) * 1000; its.it_interval.tv_sec = its.it_value.tv_sec; its.it_interval.tv_nsec = its.it_value.tv_nsec; @@ -696,7 +843,7 @@ void lib_ring_buffer_channel_read_timer_start(struct channel *chan) } its.it_value.tv_sec = chan->read_timer_interval / 1000000; - its.it_value.tv_nsec = chan->read_timer_interval % 1000000; + its.it_value.tv_nsec = (chan->read_timer_interval % 1000000) * 1000; its.it_interval.tv_sec = its.it_value.tv_sec; its.it_interval.tv_nsec = its.it_value.tv_nsec; @@ -751,22 +898,25 @@ static void channel_print_errors(struct channel *chan, for_each_possible_cpu(cpu) { struct lttng_ust_lib_ring_buffer *buf = shmp(handle, chan->backend.buf[cpu].shmp); - lib_ring_buffer_print_errors(chan, buf, cpu, handle); + if (buf) + lib_ring_buffer_print_errors(chan, buf, cpu, handle); } } else { struct lttng_ust_lib_ring_buffer *buf = shmp(handle, chan->backend.buf[0].shmp); - lib_ring_buffer_print_errors(chan, buf, -1, handle); + if (buf) + lib_ring_buffer_print_errors(chan, buf, -1, handle); } } static void channel_free(struct channel *chan, - struct lttng_ust_shm_handle *handle) + struct lttng_ust_shm_handle *handle, + int consumer) { channel_backend_free(&chan->backend, handle); /* chan is freed by shm teardown */ - shm_object_table_destroy(handle->table); + shm_object_table_destroy(handle->table, consumer); free(handle); } @@ -889,7 +1039,7 @@ struct lttng_ust_shm_handle *channel_create(const struct lttng_ust_lib_ring_buff error_backend_init: error_append: - shm_object_table_destroy(handle->table); + shm_object_table_destroy(handle->table, 1); error_table_alloc: free(handle); return NULL; @@ -921,7 +1071,7 @@ struct lttng_ust_shm_handle *channel_handle_create(void *data, return handle; error_table_object: - shm_object_table_destroy(handle->table); + shm_object_table_destroy(handle->table, 0); error_table_alloc: free(handle); return NULL; @@ -949,9 +1099,10 @@ unsigned int channel_handle_get_nr_streams(struct lttng_ust_shm_handle *handle) } static -void channel_release(struct channel *chan, struct lttng_ust_shm_handle *handle) +void channel_release(struct channel *chan, struct lttng_ust_shm_handle *handle, + int consumer) { - channel_free(chan, handle); + channel_free(chan, handle, consumer); } /** @@ -983,7 +1134,7 @@ void channel_destroy(struct channel *chan, struct lttng_ust_shm_handle *handle, * sessiond/consumer are keeping a reference on the shm file * descriptor directly. No need to refcount. */ - channel_release(chan, handle); + channel_release(chan, handle, consumer); return; } @@ -1084,6 +1235,8 @@ void lib_ring_buffer_release_read(struct lttng_ust_lib_ring_buffer *buf, { struct channel *chan = shmp(handle, buf->backend.chan); + if (!chan) + return; CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1); cmm_smp_mb(); uatomic_dec(&buf->active_readers); @@ -1103,11 +1256,15 @@ int lib_ring_buffer_snapshot(struct lttng_ust_lib_ring_buffer *buf, unsigned long *consumed, unsigned long *produced, struct lttng_ust_shm_handle *handle) { - struct channel *chan = shmp(handle, buf->backend.chan); - const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; + struct channel *chan; + const struct lttng_ust_lib_ring_buffer_config *config; unsigned long consumed_cur, write_offset; int finalized; + chan = shmp(handle, buf->backend.chan); + if (!chan) + return -EPERM; + config = &chan->backend.config; finalized = CMM_ACCESS_ONCE(buf->finalized); /* * Read finalized before counters. @@ -1148,6 +1305,43 @@ nodata: return -EAGAIN; } +/** + * Performs the same function as lib_ring_buffer_snapshot(), but the positions + * are saved regardless of whether the consumed and produced positions are + * in the same subbuffer. + * @buf: ring buffer + * @consumed: consumed byte count indicating the last position read + * @produced: produced byte count indicating the last position written + * + * This function is meant to provide information on the exact producer and + * consumer positions without regard for the "snapshot" feature. + */ +int lib_ring_buffer_snapshot_sample_positions( + struct lttng_ust_lib_ring_buffer *buf, + unsigned long *consumed, unsigned long *produced, + struct lttng_ust_shm_handle *handle) +{ + struct channel *chan; + const struct lttng_ust_lib_ring_buffer_config *config; + + chan = shmp(handle, buf->backend.chan); + if (!chan) + return -EPERM; + config = &chan->backend.config; + cmm_smp_rmb(); + *consumed = uatomic_read(&buf->consumed); + /* + * No need to issue a memory barrier between consumed count read and + * write offset read, because consumed count can only change + * concurrently in overwrite mode, and we keep a sequence counter + * identifier derived from the write offset to check we are getting + * the same sub-buffer we are expecting (the sub-buffers are atomically + * "tagged" upon writes, tags are checked upon read). + */ + *produced = v_read(config, &buf->offset); + return 0; +} + /** * lib_ring_buffer_move_consumer - move consumed counter forward * @buf: ring buffer @@ -1158,9 +1352,12 @@ void lib_ring_buffer_move_consumer(struct lttng_ust_lib_ring_buffer *buf, struct lttng_ust_shm_handle *handle) { struct lttng_ust_lib_ring_buffer_backend *bufb = &buf->backend; - struct channel *chan = shmp(handle, bufb->chan); + struct channel *chan; unsigned long consumed; + chan = shmp(handle, bufb->chan); + if (!chan) + return; CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1); /* @@ -1186,11 +1383,16 @@ int lib_ring_buffer_get_subbuf(struct lttng_ust_lib_ring_buffer *buf, unsigned long consumed, struct lttng_ust_shm_handle *handle) { - struct channel *chan = shmp(handle, buf->backend.chan); - const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; + struct channel *chan; + const struct lttng_ust_lib_ring_buffer_config *config; unsigned long consumed_cur, consumed_idx, commit_count, write_offset; int ret, finalized, nr_retry = LTTNG_UST_RING_BUFFER_GET_RETRY; + struct commit_counters_cold *cc_cold; + chan = shmp(handle, buf->backend.chan); + if (!chan) + return -EPERM; + config = &chan->backend.config; retry: finalized = CMM_ACCESS_ONCE(buf->finalized); /* @@ -1199,7 +1401,10 @@ retry: cmm_smp_rmb(); consumed_cur = uatomic_read(&buf->consumed); consumed_idx = subbuf_index(consumed, chan); - commit_count = v_read(config, &shmp_index(handle, buf->commit_cold, consumed_idx)->cc_sb); + cc_cold = shmp_index(handle, buf->commit_cold, consumed_idx); + if (!cc_cold) + return -EPERM; + commit_count = v_read(config, &cc_cold->cc_sb); /* * Make sure we read the commit count before reading the buffer * data and the write offset. Correct consumed offset ordering @@ -1342,10 +1547,16 @@ void lib_ring_buffer_put_subbuf(struct lttng_ust_lib_ring_buffer *buf, struct lttng_ust_shm_handle *handle) { struct lttng_ust_lib_ring_buffer_backend *bufb = &buf->backend; - struct channel *chan = shmp(handle, bufb->chan); - const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; - unsigned long read_sb_bindex, consumed_idx, consumed; + struct channel *chan; + const struct lttng_ust_lib_ring_buffer_config *config; + unsigned long sb_bindex, consumed_idx, consumed; + struct lttng_ust_lib_ring_buffer_backend_pages_shmp *rpages; + struct lttng_ust_lib_ring_buffer_backend_pages *backend_pages; + chan = shmp(handle, bufb->chan); + if (!chan) + return; + config = &chan->backend.config; CHAN_WARN_ON(chan, uatomic_read(&buf->active_readers) != 1); if (!buf->get_subbuf) { @@ -1365,11 +1576,16 @@ void lib_ring_buffer_put_subbuf(struct lttng_ust_lib_ring_buffer *buf, * Can be below zero if an iterator is used on a snapshot more than * once. */ - read_sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id); - v_add(config, v_read(config, - &shmp(handle, shmp_index(handle, bufb->array, read_sb_bindex)->shmp)->records_unread), - &bufb->records_read); - v_set(config, &shmp(handle, shmp_index(handle, bufb->array, read_sb_bindex)->shmp)->records_unread, 0); + sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id); + rpages = shmp_index(handle, bufb->array, sb_bindex); + if (!rpages) + return; + backend_pages = shmp(handle, rpages->shmp); + if (!backend_pages) + return; + v_add(config, v_read(config, &backend_pages->records_unread), + &bufb->records_read); + v_set(config, &backend_pages->records_unread, 0); CHAN_WARN_ON(chan, config->mode == RING_BUFFER_OVERWRITE && subbuffer_id_is_noref(config, bufb->buf_rsb.id)); subbuffer_id_set_noref(config, &bufb->buf_rsb.id); @@ -1405,10 +1621,18 @@ void lib_ring_buffer_print_subbuffer_errors(struct lttng_ust_lib_ring_buffer *bu { const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; unsigned long cons_idx, commit_count, commit_count_sb; + struct commit_counters_hot *cc_hot; + struct commit_counters_cold *cc_cold; cons_idx = subbuf_index(cons_offset, chan); - commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, cons_idx)->cc); - commit_count_sb = v_read(config, &shmp_index(handle, buf->commit_cold, cons_idx)->cc_sb); + cc_hot = shmp_index(handle, buf->commit_hot, cons_idx); + if (!cc_hot) + return; + cc_cold = shmp_index(handle, buf->commit_cold, cons_idx); + if (!cc_cold) + return; + commit_count = v_read(config, &cc_hot->cc); + commit_count_sb = v_read(config, &cc_cold->cc_sb); if (subbuf_offset(commit_count, chan) != 0) DBG("ring buffer %s, cpu %d: " @@ -1492,7 +1716,8 @@ void lib_ring_buffer_print_errors(struct channel *chan, /* * lib_ring_buffer_switch_old_start: Populate old subbuffer header. * - * Only executed when the buffer is finalized, in SWITCH_FLUSH. + * Only executed by SWITCH_FLUSH, which can be issued while tracing is + * active or at buffer finalization (destroy). */ static void lib_ring_buffer_switch_old_start(struct lttng_ust_lib_ring_buffer *buf, @@ -1504,6 +1729,7 @@ void lib_ring_buffer_switch_old_start(struct lttng_ust_lib_ring_buffer *buf, const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; unsigned long oldidx = subbuf_index(offsets->old, chan); unsigned long commit_count; + struct commit_counters_hot *cc_hot; config->cb.buffer_begin(buf, tsc, oldidx, handle); @@ -1512,15 +1738,18 @@ void lib_ring_buffer_switch_old_start(struct lttng_ust_lib_ring_buffer *buf, * determine that the subbuffer is full. */ cmm_smp_wmb(); + cc_hot = shmp_index(handle, buf->commit_hot, oldidx); + if (!cc_hot) + return; v_add(config, config->cb.subbuffer_header_size(), - &shmp_index(handle, buf->commit_hot, oldidx)->cc); - commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, oldidx)->cc); + &cc_hot->cc); + commit_count = v_read(config, &cc_hot->cc); /* Check if the written buffer has to be delivered */ lib_ring_buffer_check_deliver(config, buf, chan, offsets->old, commit_count, oldidx, handle, tsc); - lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx, + lib_ring_buffer_write_commit_counter(config, buf, chan, offsets->old + config->cb.subbuffer_header_size(), - commit_count, handle); + commit_count, handle, cc_hot); } /* @@ -1541,6 +1770,7 @@ void lib_ring_buffer_switch_old_end(struct lttng_ust_lib_ring_buffer *buf, const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; unsigned long oldidx = subbuf_index(offsets->old - 1, chan); unsigned long commit_count, padding_size, data_size; + struct commit_counters_hot *cc_hot; data_size = subbuf_offset(offsets->old - 1, chan) + 1; padding_size = chan->backend.subbuf_size - data_size; @@ -1552,12 +1782,16 @@ void lib_ring_buffer_switch_old_end(struct lttng_ust_lib_ring_buffer *buf, * determine that the subbuffer is full. */ cmm_smp_wmb(); - v_add(config, padding_size, &shmp_index(handle, buf->commit_hot, oldidx)->cc); - commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, oldidx)->cc); + cc_hot = shmp_index(handle, buf->commit_hot, oldidx); + if (!cc_hot) + return; + v_add(config, padding_size, &cc_hot->cc); + commit_count = v_read(config, &cc_hot->cc); lib_ring_buffer_check_deliver(config, buf, chan, offsets->old - 1, commit_count, oldidx, handle, tsc); - lib_ring_buffer_write_commit_counter(config, buf, chan, oldidx, - offsets->old + padding_size, commit_count, handle); + lib_ring_buffer_write_commit_counter(config, buf, chan, + offsets->old + padding_size, commit_count, handle, + cc_hot); } /* @@ -1577,6 +1811,7 @@ void lib_ring_buffer_switch_new_start(struct lttng_ust_lib_ring_buffer *buf, const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; unsigned long beginidx = subbuf_index(offsets->begin, chan); unsigned long commit_count; + struct commit_counters_hot *cc_hot; config->cb.buffer_begin(buf, tsc, beginidx, handle); @@ -1585,15 +1820,17 @@ void lib_ring_buffer_switch_new_start(struct lttng_ust_lib_ring_buffer *buf, * determine that the subbuffer is full. */ cmm_smp_wmb(); - v_add(config, config->cb.subbuffer_header_size(), - &shmp_index(handle, buf->commit_hot, beginidx)->cc); - commit_count = v_read(config, &shmp_index(handle, buf->commit_hot, beginidx)->cc); + cc_hot = shmp_index(handle, buf->commit_hot, beginidx); + if (!cc_hot) + return; + v_add(config, config->cb.subbuffer_header_size(), &cc_hot->cc); + commit_count = v_read(config, &cc_hot->cc); /* Check if the written buffer has to be delivered */ lib_ring_buffer_check_deliver(config, buf, chan, offsets->begin, commit_count, beginidx, handle, tsc); - lib_ring_buffer_write_commit_counter(config, buf, chan, beginidx, + lib_ring_buffer_write_commit_counter(config, buf, chan, offsets->begin + config->cb.subbuffer_header_size(), - commit_count, handle); + commit_count, handle, cc_hot); } /* @@ -1664,23 +1901,27 @@ int lib_ring_buffer_try_switch_slow(enum switch_mode mode, if (caa_unlikely(off == 0)) { unsigned long sb_index, commit_count; + struct commit_counters_cold *cc_cold; /* - * We are performing a SWITCH_FLUSH. At this stage, there are no - * concurrent writes into the buffer. + * We are performing a SWITCH_FLUSH. There may be concurrent + * writes into the buffer if e.g. invoked while performing a + * snapshot on an active trace. * - * The client does not save any header information. Don't - * switch empty subbuffer on finalize, because it is invalid to - * deliver a completely empty subbuffer. + * If the client does not save any header information + * (sub-buffer header size == 0), don't switch empty subbuffer + * on finalize, because it is invalid to deliver a completely + * empty subbuffer. */ if (!config->cb.subbuffer_header_size()) return -1; /* Test new buffer integrity */ sb_index = subbuf_index(offsets->begin, chan); - commit_count = v_read(config, - &shmp_index(handle, buf->commit_cold, - sb_index)->cc_sb); + cc_cold = shmp_index(handle, buf->commit_cold, sb_index); + if (!cc_cold) + return -1; + commit_count = v_read(config, &cc_cold->cc_sb); reserve_commit_diff = (buf_trunc(offsets->begin, chan) >> chan->backend.num_subbuf_order) @@ -1738,12 +1979,17 @@ int lib_ring_buffer_try_switch_slow(enum switch_mode mode, void lib_ring_buffer_switch_slow(struct lttng_ust_lib_ring_buffer *buf, enum switch_mode mode, struct lttng_ust_shm_handle *handle) { - struct channel *chan = shmp(handle, buf->backend.chan); - const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; + struct channel *chan; + const struct lttng_ust_lib_ring_buffer_config *config; struct switch_offsets offsets; unsigned long oldidx; uint64_t tsc; + chan = shmp(handle, buf->backend.chan); + if (!chan) + return; + config = &chan->backend.config; + offsets.size = 0; /* @@ -1786,6 +2032,23 @@ void lib_ring_buffer_switch_slow(struct lttng_ust_lib_ring_buffer *buf, enum swi lib_ring_buffer_switch_old_end(buf, chan, &offsets, tsc, handle); } +static +bool handle_blocking_retry(int *timeout_left_ms) +{ + int timeout = *timeout_left_ms, delay; + + if (caa_likely(!timeout)) + return false; /* Do not retry, discard event. */ + if (timeout < 0) /* Wait forever. */ + delay = RETRY_DELAY_MS; + else + delay = min_t(int, timeout, RETRY_DELAY_MS); + (void) poll(NULL, 0, delay); + if (timeout > 0) + *timeout_left_ms -= delay; + return true; /* Retry. */ +} + /* * Returns : * 0 if ok @@ -1797,11 +2060,13 @@ static int lib_ring_buffer_try_reserve_slow(struct lttng_ust_lib_ring_buffer *buf, struct channel *chan, struct switch_offsets *offsets, - struct lttng_ust_lib_ring_buffer_ctx *ctx) + struct lttng_ust_lib_ring_buffer_ctx *ctx, + void *client_ctx) { const struct lttng_ust_lib_ring_buffer_config *config = &chan->backend.config; struct lttng_ust_shm_handle *handle = ctx->handle; unsigned long reserve_commit_diff, offset_cmp; + int timeout_left_ms = lttng_ust_blocking_retry_timeout; retry: offsets->begin = offset_cmp = v_read(config, &buf->offset); @@ -1824,7 +2089,7 @@ retry: offsets->size = config->cb.record_header_size(config, chan, offsets->begin, &offsets->pre_header_padding, - ctx); + ctx, client_ctx); offsets->size += lib_ring_buffer_align(offsets->begin + offsets->size, ctx->largest_align) @@ -1837,6 +2102,7 @@ retry: } if (caa_unlikely(offsets->switch_new_start)) { unsigned long sb_index, commit_count; + struct commit_counters_cold *cc_cold; /* * We are typically not filling the previous buffer completely. @@ -1855,9 +2121,10 @@ retry: * are not seen reordered when updated by another CPU. */ cmm_smp_rmb(); - commit_count = v_read(config, - &shmp_index(handle, buf->commit_cold, - sb_index)->cc_sb); + cc_cold = shmp_index(handle, buf->commit_cold, sb_index); + if (!cc_cold) + return -1; + commit_count = v_read(config, &cc_cold->cc_sb); /* Read buf->commit_cold[sb_index].cc_sb before buf->offset. */ cmm_smp_rmb(); if (caa_unlikely(offset_cmp != v_read(config, &buf->offset))) { @@ -1882,6 +2149,9 @@ retry: >= chan->backend.buf_size)) { unsigned long nr_lost; + if (handle_blocking_retry(&timeout_left_ms)) + goto retry; + /* * We do not overwrite non consumed buffers * and we are full : record is lost. @@ -1925,7 +2195,7 @@ retry: config->cb.record_header_size(config, chan, offsets->begin, &offsets->pre_header_padding, - ctx); + ctx, client_ctx); offsets->size += lib_ring_buffer_align(offsets->begin + offsets->size, ctx->largest_align) @@ -1979,7 +2249,8 @@ retry: * -EIO for other errors, else returns 0. * It will take care of sub-buffer switching. */ -int lib_ring_buffer_reserve_slow(struct lttng_ust_lib_ring_buffer_ctx *ctx) +int lib_ring_buffer_reserve_slow(struct lttng_ust_lib_ring_buffer_ctx *ctx, + void *client_ctx) { struct channel *chan = ctx->chan; struct lttng_ust_shm_handle *handle = ctx->handle; @@ -1992,13 +2263,15 @@ int lib_ring_buffer_reserve_slow(struct lttng_ust_lib_ring_buffer_ctx *ctx) buf = shmp(handle, chan->backend.buf[ctx->cpu].shmp); else buf = shmp(handle, chan->backend.buf[0].shmp); + if (!buf) + return -EIO; ctx->buf = buf; offsets.size = 0; do { ret = lib_ring_buffer_try_reserve_slow(buf, chan, &offsets, - ctx); + ctx, client_ctx); if (caa_unlikely(ret)) return ret; } while (caa_unlikely(v_cmpxchg(config, &buf->offset, offsets.old, @@ -2050,6 +2323,158 @@ int lib_ring_buffer_reserve_slow(struct lttng_ust_lib_ring_buffer_ctx *ctx) return 0; } +static +void lib_ring_buffer_vmcore_check_deliver(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, + unsigned long commit_count, + unsigned long idx, + struct lttng_ust_shm_handle *handle) +{ + struct commit_counters_hot *cc_hot; + + if (config->oops != RING_BUFFER_OOPS_CONSISTENCY) + return; + cc_hot = shmp_index(handle, buf->commit_hot, idx); + if (!cc_hot) + return; + v_set(config, &cc_hot->seq, commit_count); +} + +/* + * The ring buffer can count events recorded and overwritten per buffer, + * but it is disabled by default due to its performance overhead. + */ +#ifdef LTTNG_RING_BUFFER_COUNT_EVENTS +static +void deliver_count_events(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, + unsigned long idx, + struct lttng_ust_shm_handle *handle) +{ + v_add(config, subbuffer_get_records_count(config, + &buf->backend, idx, handle), + &buf->records_count); + v_add(config, subbuffer_count_records_overrun(config, + &buf->backend, idx, handle), + &buf->records_overrun); +} +#else /* LTTNG_RING_BUFFER_COUNT_EVENTS */ +static +void deliver_count_events(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, + unsigned long idx, + struct lttng_ust_shm_handle *handle) +{ +} +#endif /* #else LTTNG_RING_BUFFER_COUNT_EVENTS */ + +void lib_ring_buffer_check_deliver_slow(const struct lttng_ust_lib_ring_buffer_config *config, + struct lttng_ust_lib_ring_buffer *buf, + struct channel *chan, + unsigned long offset, + unsigned long commit_count, + unsigned long idx, + struct lttng_ust_shm_handle *handle, + uint64_t tsc) +{ + unsigned long old_commit_count = commit_count + - chan->backend.subbuf_size; + struct commit_counters_cold *cc_cold; + + /* + * If we succeeded at updating cc_sb below, we are the subbuffer + * writer delivering the subbuffer. Deals with concurrent + * updates of the "cc" value without adding a add_return atomic + * operation to the fast path. + * + * We are doing the delivery in two steps: + * - First, we cmpxchg() cc_sb to the new value + * old_commit_count + 1. This ensures that we are the only + * subbuffer user successfully filling the subbuffer, but we + * do _not_ set the cc_sb value to "commit_count" yet. + * Therefore, other writers that would wrap around the ring + * buffer and try to start writing to our subbuffer would + * have to drop records, because it would appear as + * non-filled. + * We therefore have exclusive access to the subbuffer control + * structures. This mutual exclusion with other writers is + * crucially important to perform record overruns count in + * flight recorder mode locklessly. + * - When we are ready to release the subbuffer (either for + * reading or for overrun by other writers), we simply set the + * cc_sb value to "commit_count" and perform delivery. + * + * The subbuffer size is least 2 bytes (minimum size: 1 page). + * This guarantees that old_commit_count + 1 != commit_count. + */ + + /* + * Order prior updates to reserve count prior to the + * commit_cold cc_sb update. + */ + cmm_smp_wmb(); + cc_cold = shmp_index(handle, buf->commit_cold, idx); + if (!cc_cold) + return; + if (caa_likely(v_cmpxchg(config, &cc_cold->cc_sb, + old_commit_count, old_commit_count + 1) + == old_commit_count)) { + /* + * Start of exclusive subbuffer access. We are + * guaranteed to be the last writer in this subbuffer + * and any other writer trying to access this subbuffer + * in this state is required to drop records. + */ + deliver_count_events(config, buf, idx, handle); + config->cb.buffer_end(buf, tsc, idx, + lib_ring_buffer_get_data_size(config, + buf, + idx, + handle), + handle); + + /* + * Increment the packet counter while we have exclusive + * access. + */ + subbuffer_inc_packet_count(config, &buf->backend, idx, handle); + + /* + * Set noref flag and offset for this subbuffer id. + * Contains a memory barrier that ensures counter stores + * are ordered before set noref and offset. + */ + lib_ring_buffer_set_noref_offset(config, &buf->backend, idx, + buf_trunc_val(offset, chan), handle); + + /* + * Order set_noref and record counter updates before the + * end of subbuffer exclusive access. Orders with + * respect to writers coming into the subbuffer after + * wrap around, and also order wrt concurrent readers. + */ + cmm_smp_mb(); + /* End of exclusive subbuffer access */ + v_set(config, &cc_cold->cc_sb, commit_count); + /* + * Order later updates to reserve count after + * the commit cold cc_sb update. + */ + cmm_smp_wmb(); + lib_ring_buffer_vmcore_check_deliver(config, buf, + commit_count, idx, handle); + + /* + * RING_BUFFER_WAKEUP_BY_WRITER wakeup is not lock-free. + */ + if (config->wakeup == RING_BUFFER_WAKEUP_BY_WRITER + && uatomic_read(&buf->active_readers) + && lib_ring_buffer_poll_deliver(config, buf, chan, handle)) { + lib_ring_buffer_wakeup(buf, handle); + } + } +} + /* * Force a read (imply TLS fixup for dlopen) of TLS variables. */