Fix: ensure userspace accesses are done with _inatomic
[lttng-modules.git] / lib / ringbuffer / backend_internal.h
CommitLineData
886d51a3
MD
1#ifndef _LIB_RING_BUFFER_BACKEND_INTERNAL_H
2#define _LIB_RING_BUFFER_BACKEND_INTERNAL_H
f3bc08c5
MD
3
4/*
886d51a3 5 * lib/ringbuffer/backend_internal.h
f3bc08c5
MD
6 *
7 * Ring buffer backend (internal helpers).
8 *
886d51a3
MD
9 * Copyright (C) 2008-2012 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
10 *
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; only
14 * version 2.1 of the License.
15 *
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
f3bc08c5
MD
24 */
25
26#include "../../wrapper/ringbuffer/config.h"
494a81f5 27#include "../../wrapper/ringbuffer/backend_types.h"
f3bc08c5
MD
28#include "../../wrapper/ringbuffer/frontend_types.h"
29#include <linux/string.h>
4ea00e4f 30#include <linux/uaccess.h>
f3bc08c5
MD
31
32/* Ring buffer backend API presented to the frontend */
33
34/* Ring buffer and channel backend create/free */
35
36int lib_ring_buffer_backend_create(struct lib_ring_buffer_backend *bufb,
37 struct channel_backend *chan, int cpu);
38void channel_backend_unregister_notifiers(struct channel_backend *chanb);
39void lib_ring_buffer_backend_free(struct lib_ring_buffer_backend *bufb);
40int channel_backend_init(struct channel_backend *chanb,
41 const char *name,
42 const struct lib_ring_buffer_config *config,
43 void *priv, size_t subbuf_size,
44 size_t num_subbuf);
45void channel_backend_free(struct channel_backend *chanb);
46
47void lib_ring_buffer_backend_reset(struct lib_ring_buffer_backend *bufb);
48void channel_backend_reset(struct channel_backend *chanb);
49
50int lib_ring_buffer_backend_init(void);
51void lib_ring_buffer_backend_exit(void);
52
53extern void _lib_ring_buffer_write(struct lib_ring_buffer_backend *bufb,
54 size_t offset, const void *src, size_t len,
55 ssize_t pagecpy);
4ea00e4f
JD
56extern void _lib_ring_buffer_memset(struct lib_ring_buffer_backend *bufb,
57 size_t offset, int c, size_t len,
58 ssize_t pagecpy);
7b8ea3a5 59extern void _lib_ring_buffer_copy_from_user_inatomic(struct lib_ring_buffer_backend *bufb,
4ea00e4f
JD
60 size_t offset, const void *src,
61 size_t len, ssize_t pagecpy);
f3bc08c5
MD
62
63/*
64 * Subbuffer ID bits for overwrite mode. Need to fit within a single word to be
65 * exchanged atomically.
66 *
67 * Top half word, except lowest bit, belongs to "offset", which is used to keep
68 * to count the produced buffers. For overwrite mode, this provides the
69 * consumer with the capacity to read subbuffers in order, handling the
70 * situation where producers would write up to 2^15 buffers (or 2^31 for 64-bit
71 * systems) concurrently with a single execution of get_subbuf (between offset
72 * sampling and subbuffer ID exchange).
73 */
74
75#define HALF_ULONG_BITS (BITS_PER_LONG >> 1)
76
77#define SB_ID_OFFSET_SHIFT (HALF_ULONG_BITS + 1)
78#define SB_ID_OFFSET_COUNT (1UL << SB_ID_OFFSET_SHIFT)
79#define SB_ID_OFFSET_MASK (~(SB_ID_OFFSET_COUNT - 1))
80/*
81 * Lowest bit of top word half belongs to noref. Used only for overwrite mode.
82 */
83#define SB_ID_NOREF_SHIFT (SB_ID_OFFSET_SHIFT - 1)
84#define SB_ID_NOREF_COUNT (1UL << SB_ID_NOREF_SHIFT)
85#define SB_ID_NOREF_MASK SB_ID_NOREF_COUNT
86/*
87 * In overwrite mode: lowest half of word is used for index.
88 * Limit of 2^16 subbuffers per buffer on 32-bit, 2^32 on 64-bit.
89 * In producer-consumer mode: whole word used for index.
90 */
91#define SB_ID_INDEX_SHIFT 0
92#define SB_ID_INDEX_COUNT (1UL << SB_ID_INDEX_SHIFT)
93#define SB_ID_INDEX_MASK (SB_ID_NOREF_COUNT - 1)
94
95/*
96 * Construct the subbuffer id from offset, index and noref. Use only the index
97 * for producer-consumer mode (offset and noref are only used in overwrite
98 * mode).
99 */
100static inline
101unsigned long subbuffer_id(const struct lib_ring_buffer_config *config,
102 unsigned long offset, unsigned long noref,
103 unsigned long index)
104{
105 if (config->mode == RING_BUFFER_OVERWRITE)
106 return (offset << SB_ID_OFFSET_SHIFT)
107 | (noref << SB_ID_NOREF_SHIFT)
108 | index;
109 else
110 return index;
111}
112
113/*
114 * Compare offset with the offset contained within id. Return 1 if the offset
115 * bits are identical, else 0.
116 */
117static inline
118int subbuffer_id_compare_offset(const struct lib_ring_buffer_config *config,
119 unsigned long id, unsigned long offset)
120{
121 return (id & SB_ID_OFFSET_MASK) == (offset << SB_ID_OFFSET_SHIFT);
122}
123
124static inline
125unsigned long subbuffer_id_get_index(const struct lib_ring_buffer_config *config,
126 unsigned long id)
127{
128 if (config->mode == RING_BUFFER_OVERWRITE)
129 return id & SB_ID_INDEX_MASK;
130 else
131 return id;
132}
133
134static inline
135unsigned long subbuffer_id_is_noref(const struct lib_ring_buffer_config *config,
136 unsigned long id)
137{
138 if (config->mode == RING_BUFFER_OVERWRITE)
139 return !!(id & SB_ID_NOREF_MASK);
140 else
141 return 1;
142}
143
144/*
145 * Only used by reader on subbuffer ID it has exclusive access to. No volatile
146 * needed.
147 */
148static inline
149void subbuffer_id_set_noref(const struct lib_ring_buffer_config *config,
150 unsigned long *id)
151{
152 if (config->mode == RING_BUFFER_OVERWRITE)
153 *id |= SB_ID_NOREF_MASK;
154}
155
156static inline
157void subbuffer_id_set_noref_offset(const struct lib_ring_buffer_config *config,
158 unsigned long *id, unsigned long offset)
159{
160 unsigned long tmp;
161
162 if (config->mode == RING_BUFFER_OVERWRITE) {
163 tmp = *id;
164 tmp &= ~SB_ID_OFFSET_MASK;
165 tmp |= offset << SB_ID_OFFSET_SHIFT;
166 tmp |= SB_ID_NOREF_MASK;
167 /* Volatile store, read concurrently by readers. */
168 ACCESS_ONCE(*id) = tmp;
169 }
170}
171
172/* No volatile access, since already used locally */
173static inline
174void subbuffer_id_clear_noref(const struct lib_ring_buffer_config *config,
175 unsigned long *id)
176{
177 if (config->mode == RING_BUFFER_OVERWRITE)
178 *id &= ~SB_ID_NOREF_MASK;
179}
180
181/*
182 * For overwrite mode, cap the number of subbuffers per buffer to:
183 * 2^16 on 32-bit architectures
184 * 2^32 on 64-bit architectures
185 * This is required to fit in the index part of the ID. Return 0 on success,
186 * -EPERM on failure.
187 */
188static inline
189int subbuffer_id_check_index(const struct lib_ring_buffer_config *config,
190 unsigned long num_subbuf)
191{
192 if (config->mode == RING_BUFFER_OVERWRITE)
193 return (num_subbuf > (1UL << HALF_ULONG_BITS)) ? -EPERM : 0;
194 else
195 return 0;
196}
197
198static inline
199void subbuffer_count_record(const struct lib_ring_buffer_config *config,
200 struct lib_ring_buffer_backend *bufb,
201 unsigned long idx)
202{
203 unsigned long sb_bindex;
204
205 sb_bindex = subbuffer_id_get_index(config, bufb->buf_wsb[idx].id);
206 v_inc(config, &bufb->array[sb_bindex]->records_commit);
207}
208
209/*
210 * Reader has exclusive subbuffer access for record consumption. No need to
211 * perform the decrement atomically.
212 */
213static inline
214void subbuffer_consume_record(const struct lib_ring_buffer_config *config,
215 struct lib_ring_buffer_backend *bufb)
216{
217 unsigned long sb_bindex;
218
219 sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id);
220 CHAN_WARN_ON(bufb->chan,
221 !v_read(config, &bufb->array[sb_bindex]->records_unread));
222 /* Non-atomic decrement protected by exclusive subbuffer access */
223 _v_dec(config, &bufb->array[sb_bindex]->records_unread);
224 v_inc(config, &bufb->records_read);
225}
226
227static inline
228unsigned long subbuffer_get_records_count(
229 const struct lib_ring_buffer_config *config,
230 struct lib_ring_buffer_backend *bufb,
231 unsigned long idx)
232{
233 unsigned long sb_bindex;
234
235 sb_bindex = subbuffer_id_get_index(config, bufb->buf_wsb[idx].id);
236 return v_read(config, &bufb->array[sb_bindex]->records_commit);
237}
238
239/*
240 * Must be executed at subbuffer delivery when the writer has _exclusive_
241 * subbuffer access. See ring_buffer_check_deliver() for details.
242 * ring_buffer_get_records_count() must be called to get the records count
243 * before this function, because it resets the records_commit count.
244 */
245static inline
246unsigned long subbuffer_count_records_overrun(
247 const struct lib_ring_buffer_config *config,
248 struct lib_ring_buffer_backend *bufb,
249 unsigned long idx)
250{
251 struct lib_ring_buffer_backend_pages *pages;
252 unsigned long overruns, sb_bindex;
253
254 sb_bindex = subbuffer_id_get_index(config, bufb->buf_wsb[idx].id);
255 pages = bufb->array[sb_bindex];
256 overruns = v_read(config, &pages->records_unread);
257 v_set(config, &pages->records_unread,
258 v_read(config, &pages->records_commit));
259 v_set(config, &pages->records_commit, 0);
260
261 return overruns;
262}
263
264static inline
265void subbuffer_set_data_size(const struct lib_ring_buffer_config *config,
266 struct lib_ring_buffer_backend *bufb,
267 unsigned long idx,
268 unsigned long data_size)
269{
270 struct lib_ring_buffer_backend_pages *pages;
271 unsigned long sb_bindex;
272
273 sb_bindex = subbuffer_id_get_index(config, bufb->buf_wsb[idx].id);
274 pages = bufb->array[sb_bindex];
275 pages->data_size = data_size;
276}
277
278static inline
279unsigned long subbuffer_get_read_data_size(
280 const struct lib_ring_buffer_config *config,
281 struct lib_ring_buffer_backend *bufb)
282{
283 struct lib_ring_buffer_backend_pages *pages;
284 unsigned long sb_bindex;
285
286 sb_bindex = subbuffer_id_get_index(config, bufb->buf_rsb.id);
287 pages = bufb->array[sb_bindex];
288 return pages->data_size;
289}
290
291static inline
292unsigned long subbuffer_get_data_size(
293 const struct lib_ring_buffer_config *config,
294 struct lib_ring_buffer_backend *bufb,
295 unsigned long idx)
296{
297 struct lib_ring_buffer_backend_pages *pages;
298 unsigned long sb_bindex;
299
300 sb_bindex = subbuffer_id_get_index(config, bufb->buf_wsb[idx].id);
301 pages = bufb->array[sb_bindex];
302 return pages->data_size;
303}
304
305/**
306 * lib_ring_buffer_clear_noref - Clear the noref subbuffer flag, called by
307 * writer.
308 */
309static inline
310void lib_ring_buffer_clear_noref(const struct lib_ring_buffer_config *config,
311 struct lib_ring_buffer_backend *bufb,
312 unsigned long idx)
313{
314 unsigned long id, new_id;
315
316 if (config->mode != RING_BUFFER_OVERWRITE)
317 return;
318
319 /*
320 * Performing a volatile access to read the sb_pages, because we want to
321 * read a coherent version of the pointer and the associated noref flag.
322 */
323 id = ACCESS_ONCE(bufb->buf_wsb[idx].id);
324 for (;;) {
325 /* This check is called on the fast path for each record. */
326 if (likely(!subbuffer_id_is_noref(config, id))) {
327 /*
328 * Store after load dependency ordering the writes to
329 * the subbuffer after load and test of the noref flag
330 * matches the memory barrier implied by the cmpxchg()
331 * in update_read_sb_index().
332 */
333 return; /* Already writing to this buffer */
334 }
335 new_id = id;
336 subbuffer_id_clear_noref(config, &new_id);
337 new_id = cmpxchg(&bufb->buf_wsb[idx].id, id, new_id);
338 if (likely(new_id == id))
339 break;
340 id = new_id;
341 }
342}
343
344/**
345 * lib_ring_buffer_set_noref_offset - Set the noref subbuffer flag and offset,
346 * called by writer.
347 */
348static inline
349void lib_ring_buffer_set_noref_offset(const struct lib_ring_buffer_config *config,
350 struct lib_ring_buffer_backend *bufb,
351 unsigned long idx, unsigned long offset)
352{
353 if (config->mode != RING_BUFFER_OVERWRITE)
354 return;
355
356 /*
357 * Because ring_buffer_set_noref() is only called by a single thread
358 * (the one which updated the cc_sb value), there are no concurrent
359 * updates to take care of: other writers have not updated cc_sb, so
360 * they cannot set the noref flag, and concurrent readers cannot modify
361 * the pointer because the noref flag is not set yet.
362 * The smp_wmb() in ring_buffer_commit() takes care of ordering writes
363 * to the subbuffer before this set noref operation.
364 * subbuffer_set_noref() uses a volatile store to deal with concurrent
365 * readers of the noref flag.
366 */
367 CHAN_WARN_ON(bufb->chan,
368 subbuffer_id_is_noref(config, bufb->buf_wsb[idx].id));
369 /*
370 * Memory barrier that ensures counter stores are ordered before set
371 * noref and offset.
372 */
373 smp_mb();
374 subbuffer_id_set_noref_offset(config, &bufb->buf_wsb[idx].id, offset);
375}
376
377/**
378 * update_read_sb_index - Read-side subbuffer index update.
379 */
380static inline
381int update_read_sb_index(const struct lib_ring_buffer_config *config,
382 struct lib_ring_buffer_backend *bufb,
383 struct channel_backend *chanb,
384 unsigned long consumed_idx,
385 unsigned long consumed_count)
386{
387 unsigned long old_id, new_id;
388
389 if (config->mode == RING_BUFFER_OVERWRITE) {
390 /*
391 * Exchange the target writer subbuffer with our own unused
392 * subbuffer. No need to use ACCESS_ONCE() here to read the
393 * old_wpage, because the value read will be confirmed by the
394 * following cmpxchg().
395 */
396 old_id = bufb->buf_wsb[consumed_idx].id;
397 if (unlikely(!subbuffer_id_is_noref(config, old_id)))
398 return -EAGAIN;
399 /*
400 * Make sure the offset count we are expecting matches the one
401 * indicated by the writer.
402 */
403 if (unlikely(!subbuffer_id_compare_offset(config, old_id,
404 consumed_count)))
405 return -EAGAIN;
406 CHAN_WARN_ON(bufb->chan,
407 !subbuffer_id_is_noref(config, bufb->buf_rsb.id));
408 subbuffer_id_set_noref_offset(config, &bufb->buf_rsb.id,
409 consumed_count);
410 new_id = cmpxchg(&bufb->buf_wsb[consumed_idx].id, old_id,
411 bufb->buf_rsb.id);
412 if (unlikely(old_id != new_id))
413 return -EAGAIN;
414 bufb->buf_rsb.id = new_id;
415 } else {
416 /* No page exchange, use the writer page directly */
417 bufb->buf_rsb.id = bufb->buf_wsb[consumed_idx].id;
418 }
419 return 0;
420}
421
422/*
423 * Use the architecture-specific memcpy implementation for constant-sized
424 * inputs, but rely on an inline memcpy for length statically unknown.
425 * The function call to memcpy is just way too expensive for a fast path.
426 */
427#define lib_ring_buffer_do_copy(config, dest, src, len) \
428do { \
429 size_t __len = (len); \
430 if (__builtin_constant_p(len)) \
431 memcpy(dest, src, __len); \
432 else \
433 inline_memcpy(dest, src, __len); \
434} while (0)
435
4ea00e4f 436/*
7b8ea3a5 437 * We use __copy_from_user_inatomic to copy userspace data since we already
4ea00e4f
JD
438 * did the access_ok for the whole range.
439 */
440static inline
7b8ea3a5 441unsigned long lib_ring_buffer_do_copy_from_user_inatomic(void *dest,
4ea00e4f
JD
442 const void __user *src,
443 unsigned long len)
444{
7b8ea3a5 445 return __copy_from_user_inatomic(dest, src, len);
4ea00e4f
JD
446}
447
448/*
449 * write len bytes to dest with c
450 */
451static inline
452void lib_ring_buffer_do_memset(char *dest, int c,
453 unsigned long len)
454{
455 unsigned long i;
456
457 for (i = 0; i < len; i++)
458 dest[i] = c;
459}
460
886d51a3 461#endif /* _LIB_RING_BUFFER_BACKEND_INTERNAL_H */
This page took 0.041326 seconds and 4 git commands to generate.