add missing licence headers
[ust.git] / libust / buffers.h
CommitLineData
b5b073e2
PMF
1/*
2 * buffers.h
a09dac63 3 * LTTng userspace tracer buffering system
b5b073e2
PMF
4 *
5 * Copyright (C) 2009 - Pierre-Marc Fournier (pierre-marc dot fournier at polymtl dot ca)
6 * Copyright (C) 2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
7 *
a09dac63
PMF
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
b5b073e2
PMF
21 */
22
23#ifndef _UST_BUFFERS_H
24#define _UST_BUFFERS_H
25
26#include <kcompat/kref.h>
27#include <assert.h>
28#include "channels.h"
b73a4c47
PMF
29#include "tracerconst.h"
30#include "tracercore.h"
31#include "header-inline.h"
32#include <usterr.h>
b5b073e2 33
dc284811 34/***** FIXME: SHOULD BE REMOVED ***** */
b5b073e2
PMF
35
36/*
37 * BUFFER_TRUNC zeroes the subbuffer offset and the subbuffer number parts of
38 * the offset, which leaves only the buffer number.
39 */
40#define BUFFER_TRUNC(offset, chan) \
41 ((offset) & (~((chan)->alloc_size-1)))
42#define BUFFER_OFFSET(offset, chan) ((offset) & ((chan)->alloc_size - 1))
43#define SUBBUF_OFFSET(offset, chan) ((offset) & ((chan)->subbuf_size - 1))
44#define SUBBUF_ALIGN(offset, chan) \
45 (((offset) + (chan)->subbuf_size) & (~((chan)->subbuf_size - 1)))
46#define SUBBUF_TRUNC(offset, chan) \
47 ((offset) & (~((chan)->subbuf_size - 1)))
48#define SUBBUF_INDEX(offset, chan) \
49 (BUFFER_OFFSET((offset), chan) >> (chan)->subbuf_size_order)
50
51/*
52 * Tracks changes to rchan/rchan_buf structs
53 */
54#define UST_CHANNEL_VERSION 8
55
b73a4c47
PMF
56/**************************************/
57
58struct commit_counters {
b102c2b0
PMF
59 long cc; /* ATOMIC */
60 long cc_sb; /* ATOMIC - Incremented _once_ at sb switch */
b73a4c47
PMF
61};
62
b5b073e2
PMF
63struct ust_buffer {
64 /* First 32 bytes cache-hot cacheline */
b102c2b0 65 long offset; /* Current offset in the buffer *atomic* */
b73a4c47 66 struct commit_counters *commit_count; /* Commit count per sub-buffer */
b102c2b0 67 long consumed; /* Current offset in the buffer *atomic* access (shared) */
b5b073e2
PMF
68 unsigned long last_tsc; /*
69 * Last timestamp written in the buffer.
70 */
71 /* End of first 32 bytes cacheline */
b102c2b0
PMF
72 long active_readers; /* ATOMIC - Active readers count standard atomic access (shared) */
73 long events_lost; /* ATOMIC */
74 long corrupted_subbuffers; /* *ATOMIC* */
b5b073e2
PMF
75 /* one byte is written to this pipe when data is available, in order
76 to wake the consumer */
77 /* portability: Single byte writes must be as quick as possible. The kernel-side
78 buffer must be large enough so the writer doesn't block. From the pipe(7)
79 man page: Since linux 2.6.11, the pipe capacity is 65536 bytes. */
80 int data_ready_fd_write;
81 /* the reading end of the pipe */
82 int data_ready_fd_read;
83
b73a4c47
PMF
84 unsigned int finalized;
85//ust// struct timer_list switch_timer; /* timer for periodical switch */
86 unsigned long switch_timer_interval; /* 0 = unset */
87
b5b073e2 88 struct ust_channel *chan;
b73a4c47 89
b5b073e2
PMF
90 struct kref kref;
91 void *buf_data;
92 size_t buf_size;
93 int shmid;
204141ee 94 unsigned int cpu;
b5b073e2
PMF
95
96 /* commit count per subbuffer; must be at end of struct */
b102c2b0 97 long commit_seq[0] ____cacheline_aligned; /* ATOMIC */
b5b073e2
PMF
98} ____cacheline_aligned;
99
b5b073e2 100/*
b73a4c47
PMF
101 * A switch is done during tracing or as a final flush after tracing (so it
102 * won't write in the new sub-buffer).
103 * FIXME: make this message clearer
b5b073e2 104 */
b73a4c47
PMF
105enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
106
107extern int ltt_reserve_slot_lockless_slow(struct ust_trace *trace,
108 struct ust_channel *ltt_channel, void **transport_data,
109 size_t data_size, size_t *slot_size, long *buf_offset, u64 *tsc,
110 unsigned int *rflags, int largest_align, int cpu);
111
112extern void ltt_force_switch_lockless_slow(struct ust_buffer *buf,
113 enum force_switch_mode mode);
114
b5b073e2 115
a2fd50ef 116static __inline__ void ust_buffers_do_copy(void *dest, const void *src, size_t len)
b5b073e2
PMF
117{
118 union {
119 const void *src;
120 const u8 *src8;
121 const u16 *src16;
122 const u32 *src32;
123 const u64 *src64;
124 } u = { .src = src };
125
126 switch (len) {
b73a4c47
PMF
127 case 0: break;
128 case 1: *(u8 *)dest = *u.src8;
b5b073e2 129 break;
b73a4c47 130 case 2: *(u16 *)dest = *u.src16;
b5b073e2 131 break;
b73a4c47 132 case 4: *(u32 *)dest = *u.src32;
b5b073e2 133 break;
b73a4c47 134 case 8: *(u64 *)dest = *u.src64;
b5b073e2
PMF
135 break;
136 default:
137 memcpy(dest, src, len);
138 }
139}
140
b73a4c47
PMF
141static __inline__ void *ust_buffers_offset_address(struct ust_buffer *buf, size_t offset)
142{
143 return ((char *)buf->buf_data)+offset;
144}
145
146/*
147 * Last TSC comparison functions. Check if the current TSC overflows
148 * LTT_TSC_BITS bits from the last TSC read. Reads and writes last_tsc
149 * atomically.
150 */
151
152/* FIXME: does this test work properly? */
153#if (BITS_PER_LONG == 32)
154static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
155 u64 tsc)
156{
157 ltt_buf->last_tsc = (unsigned long)(tsc >> LTT_TSC_BITS);
158}
159
160static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
161 u64 tsc)
162{
163 unsigned long tsc_shifted = (unsigned long)(tsc >> LTT_TSC_BITS);
164
165 if (unlikely((tsc_shifted - ltt_buf->last_tsc)))
166 return 1;
167 else
168 return 0;
169}
170#else
171static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
172 u64 tsc)
173{
174 ltt_buf->last_tsc = (unsigned long)tsc;
175}
176
177static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
178 u64 tsc)
179{
180 if (unlikely((tsc - ltt_buf->last_tsc) >> LTT_TSC_BITS))
181 return 1;
182 else
183 return 0;
184}
185#endif
186
187static __inline__ void ltt_reserve_push_reader(
188 struct ust_channel *rchan,
189 struct ust_buffer *buf,
190 long offset)
191{
192 long consumed_old, consumed_new;
193
194 do {
b102c2b0 195 consumed_old = uatomic_read(&buf->consumed);
b73a4c47
PMF
196 /*
197 * If buffer is in overwrite mode, push the reader consumed
198 * count if the write position has reached it and we are not
199 * at the first iteration (don't push the reader farther than
200 * the writer). This operation can be done concurrently by many
201 * writers in the same buffer, the writer being at the farthest
202 * write position sub-buffer index in the buffer being the one
203 * which will win this loop.
204 * If the buffer is not in overwrite mode, pushing the reader
205 * only happens if a sub-buffer is corrupted.
206 */
207 if (unlikely((SUBBUF_TRUNC(offset, buf->chan)
208 - SUBBUF_TRUNC(consumed_old, buf->chan))
209 >= rchan->alloc_size))
210 consumed_new = SUBBUF_ALIGN(consumed_old, buf->chan);
211 else
212 return;
b102c2b0 213 } while (unlikely(uatomic_cmpxchg(&buf->consumed, consumed_old,
b73a4c47
PMF
214 consumed_new) != consumed_old));
215}
216
217static __inline__ void ltt_vmcore_check_deliver(
218 struct ust_buffer *buf,
219 long commit_count, long idx)
220{
b102c2b0 221 uatomic_set(&buf->commit_seq[idx], commit_count);
b73a4c47
PMF
222}
223
224static __inline__ void ltt_check_deliver(struct ust_channel *chan,
225 struct ust_buffer *buf,
226 long offset, long commit_count, long idx)
227{
228 long old_commit_count = commit_count - chan->subbuf_size;
229
230 /* Check if all commits have been done */
231 if (unlikely((BUFFER_TRUNC(offset, chan)
232 >> chan->n_subbufs_order)
233 - (old_commit_count
234 & chan->commit_count_mask) == 0)) {
235 /*
236 * If we succeeded in updating the cc_sb, we are delivering
237 * the subbuffer. Deals with concurrent updates of the "cc"
238 * value without adding a add_return atomic operation to the
239 * fast path.
240 */
b102c2b0 241 if (likely(uatomic_cmpxchg(&buf->commit_count[idx].cc_sb,
b73a4c47
PMF
242 old_commit_count, commit_count)
243 == old_commit_count)) {
244 int result;
245
246 /*
247 * Set noref flag for this subbuffer.
248 */
249//ust// ltt_set_noref_flag(rchan, buf, idx);
250 ltt_vmcore_check_deliver(buf, commit_count, idx);
251
252 /* wakeup consumer */
253 result = write(buf->data_ready_fd_write, "1", 1);
254 if(result == -1) {
255 PERROR("write (in ltt_relay_buffer_flush)");
256 ERR("this should never happen!");
257 }
258 }
259 }
260}
261
262static __inline__ int ltt_poll_deliver(struct ust_channel *chan, struct ust_buffer *buf)
263{
264 long consumed_old, consumed_idx, commit_count, write_offset;
265
b102c2b0 266 consumed_old = uatomic_read(&buf->consumed);
b73a4c47 267 consumed_idx = SUBBUF_INDEX(consumed_old, buf->chan);
b102c2b0 268 commit_count = uatomic_read(&buf->commit_count[consumed_idx].cc_sb);
b73a4c47
PMF
269 /*
270 * No memory barrier here, since we are only interested
271 * in a statistically correct polling result. The next poll will
272 * get the data is we are racing. The mb() that ensures correct
273 * memory order is in get_subbuf.
274 */
b102c2b0 275 write_offset = uatomic_read(&buf->offset);
b73a4c47
PMF
276
277 /*
278 * Check that the subbuffer we are trying to consume has been
279 * already fully committed.
280 */
281
282 if (((commit_count - chan->subbuf_size)
283 & chan->commit_count_mask)
284 - (BUFFER_TRUNC(consumed_old, buf->chan)
285 >> chan->n_subbufs_order)
286 != 0)
287 return 0;
288
289 /*
290 * Check that we are not about to read the same subbuffer in
291 * which the writer head is.
292 */
293 if ((SUBBUF_TRUNC(write_offset, buf->chan)
294 - SUBBUF_TRUNC(consumed_old, buf->chan))
295 == 0)
296 return 0;
297
298 return 1;
299
300}
301
302/*
303 * returns 0 if reserve ok, or 1 if the slow path must be taken.
304 */
305static __inline__ int ltt_relay_try_reserve(
306 struct ust_channel *chan,
307 struct ust_buffer *buf,
308 size_t data_size,
309 u64 *tsc, unsigned int *rflags, int largest_align,
310 long *o_begin, long *o_end, long *o_old,
311 size_t *before_hdr_pad, size_t *size)
312{
b102c2b0 313 *o_begin = uatomic_read(&buf->offset);
b73a4c47
PMF
314 *o_old = *o_begin;
315
316 *tsc = trace_clock_read64();
317
318//ust// #ifdef CONFIG_LTT_VMCORE
319//ust// prefetch(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
320//ust// prefetch(&buf->commit_seq[SUBBUF_INDEX(*o_begin, rchan)]);
321//ust// #else
322//ust// prefetchw(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
323//ust// #endif
324 if (last_tsc_overflow(buf, *tsc))
325 *rflags = LTT_RFLAG_ID_SIZE_TSC;
326
327 if (unlikely(SUBBUF_OFFSET(*o_begin, buf->chan) == 0))
328 return 1;
329
330 *size = ust_get_header_size(chan,
331 *o_begin, data_size,
332 before_hdr_pad, *rflags);
333 *size += ltt_align(*o_begin + *size, largest_align) + data_size;
334 if (unlikely((SUBBUF_OFFSET(*o_begin, buf->chan) + *size)
335 > buf->chan->subbuf_size))
336 return 1;
337
338 /*
339 * Event fits in the current buffer and we are not on a switch
340 * boundary. It's safe to write.
341 */
342 *o_end = *o_begin + *size;
343
344 if (unlikely((SUBBUF_OFFSET(*o_end, buf->chan)) == 0))
345 /*
346 * The offset_end will fall at the very beginning of the next
347 * subbuffer.
348 */
349 return 1;
350
351 return 0;
352}
353
354static __inline__ int ltt_reserve_slot(struct ust_trace *trace,
355 struct ust_channel *chan, void **transport_data,
356 size_t data_size, size_t *slot_size, long *buf_offset, u64 *tsc,
357 unsigned int *rflags, int largest_align, int cpu)
358{
359 struct ust_buffer *buf = chan->buf[cpu];
360 long o_begin, o_end, o_old;
361 size_t before_hdr_pad;
362
363 /*
364 * Perform retryable operations.
365 */
366 /* FIXME: make this rellay per cpu? */
015d08b6 367 if (unlikely(LOAD_SHARED(ltt_nesting) > 4)) {
e5bc3b0f 368 DBG("Dropping event because nesting is too deep.");
b102c2b0 369 uatomic_inc(&buf->events_lost);
b73a4c47
PMF
370 return -EPERM;
371 }
372
373 if (unlikely(ltt_relay_try_reserve(chan, buf,
374 data_size, tsc, rflags,
375 largest_align, &o_begin, &o_end, &o_old,
376 &before_hdr_pad, slot_size)))
377 goto slow_path;
378
b102c2b0 379 if (unlikely(uatomic_cmpxchg(&buf->offset, o_old, o_end) != o_old))
b73a4c47
PMF
380 goto slow_path;
381
382 /*
383 * Atomically update last_tsc. This update races against concurrent
384 * atomic updates, but the race will always cause supplementary full TSC
385 * events, never the opposite (missing a full TSC event when it would be
386 * needed).
387 */
388 save_last_tsc(buf, *tsc);
389
390 /*
391 * Push the reader if necessary
392 */
393 ltt_reserve_push_reader(chan, buf, o_end - 1);
394
395 /*
396 * Clear noref flag for this subbuffer.
397 */
398//ust// ltt_clear_noref_flag(chan, buf, SUBBUF_INDEX(o_end - 1, chan));
399
400 *buf_offset = o_begin + before_hdr_pad;
401 return 0;
402slow_path:
403 return ltt_reserve_slot_lockless_slow(trace, chan,
404 transport_data, data_size, slot_size, buf_offset, tsc,
405 rflags, largest_align, cpu);
406}
407
408/*
409 * Force a sub-buffer switch for a per-cpu buffer. This operation is
410 * completely reentrant : can be called while tracing is active with
411 * absolutely no lock held.
b73a4c47
PMF
412 */
413static __inline__ void ltt_force_switch(struct ust_buffer *buf,
414 enum force_switch_mode mode)
415{
416 return ltt_force_switch_lockless_slow(buf, mode);
417}
418
419/*
420 * for flight recording. must be called after relay_commit.
8c36d1ee
PMF
421 * This function increments the subbuffers's commit_seq counter each time the
422 * commit count reaches back the reserve offset (module subbuffer size). It is
423 * useful for crash dump.
b73a4c47 424 */
1e8c9e7b
PMF
425//ust// #ifdef CONFIG_LTT_VMCORE
426static __inline__ void ltt_write_commit_counter(struct ust_channel *chan,
427 struct ust_buffer *buf, long idx, long buf_offset,
428 long commit_count, size_t data_size)
b73a4c47
PMF
429{
430 long offset;
431 long commit_seq_old;
432
433 offset = buf_offset + data_size;
434
435 /*
436 * SUBBUF_OFFSET includes commit_count_mask. We can simply
437 * compare the offsets within the subbuffer without caring about
438 * buffer full/empty mismatch because offset is never zero here
439 * (subbuffer header and event headers have non-zero length).
440 */
441 if (unlikely(SUBBUF_OFFSET(offset - commit_count, buf->chan)))
442 return;
443
b102c2b0 444 commit_seq_old = uatomic_read(&buf->commit_seq[idx]);
b73a4c47 445 while (commit_seq_old < commit_count)
b102c2b0 446 commit_seq_old = uatomic_cmpxchg(&buf->commit_seq[idx],
b73a4c47 447 commit_seq_old, commit_count);
1e8c9e7b
PMF
448
449 DBG("commit_seq for channel %s_%d, subbuf %ld is now %ld", buf->chan->channel_name, buf->cpu, idx, commit_count);
b73a4c47 450}
1e8c9e7b
PMF
451//ust// #else
452//ust// static __inline__ void ltt_write_commit_counter(struct ust_buffer *buf,
453//ust// long idx, long buf_offset, long commit_count, size_t data_size)
454//ust// {
455//ust// }
456//ust// #endif
b73a4c47
PMF
457
458/*
459 * Atomic unordered slot commit. Increments the commit count in the
460 * specified sub-buffer, and delivers it if necessary.
461 *
462 * Parameters:
463 *
464 * @ltt_channel : channel structure
465 * @transport_data: transport-specific data
466 * @buf_offset : offset following the event header.
467 * @data_size : size of the event data.
468 * @slot_size : size of the reserved slot.
469 */
470static __inline__ void ltt_commit_slot(
471 struct ust_channel *chan,
472 struct ust_buffer *buf, long buf_offset,
473 size_t data_size, size_t slot_size)
474{
475 long offset_end = buf_offset;
476 long endidx = SUBBUF_INDEX(offset_end - 1, chan);
477 long commit_count;
478
479#ifdef LTT_NO_IPI_BARRIER
480 smp_wmb();
481#else
482 /*
483 * Must write slot data before incrementing commit count.
484 * This compiler barrier is upgraded into a smp_mb() by the IPI
485 * sent by get_subbuf().
486 */
487 barrier();
488#endif
b102c2b0 489 uatomic_add(&buf->commit_count[endidx].cc, slot_size);
b73a4c47
PMF
490 /*
491 * commit count read can race with concurrent OOO commit count updates.
492 * This is only needed for ltt_check_deliver (for non-polling delivery
493 * only) and for ltt_write_commit_counter. The race can only cause the
494 * counter to be read with the same value more than once, which could
495 * cause :
496 * - Multiple delivery for the same sub-buffer (which is handled
497 * gracefully by the reader code) if the value is for a full
498 * sub-buffer. It's important that we can never miss a sub-buffer
b102c2b0 499 * delivery. Re-reading the value after the uatomic_add ensures this.
b73a4c47
PMF
500 * - Reading a commit_count with a higher value that what was actually
501 * added to it for the ltt_write_commit_counter call (again caused by
502 * a concurrent committer). It does not matter, because this function
503 * is interested in the fact that the commit count reaches back the
504 * reserve offset for a specific sub-buffer, which is completely
505 * independent of the order.
506 */
b102c2b0 507 commit_count = uatomic_read(&buf->commit_count[endidx].cc);
b73a4c47
PMF
508
509 ltt_check_deliver(chan, buf, offset_end - 1, commit_count, endidx);
510 /*
8c36d1ee 511 * Update data_size for each commit. It's needed only for extracting
b73a4c47
PMF
512 * ltt buffers from vmcore, after crash.
513 */
1e8c9e7b 514 ltt_write_commit_counter(chan, buf, endidx, buf_offset, commit_count, data_size);
b73a4c47
PMF
515}
516
517void _ust_buffers_write(struct ust_buffer *buf, size_t offset,
518 const void *src, size_t len, ssize_t cpy);
519
a2fd50ef 520static __inline__ int ust_buffers_write(struct ust_buffer *buf, size_t offset,
b73a4c47 521 const void *src, size_t len)
b5b073e2
PMF
522{
523 size_t cpy;
524 size_t buf_offset = BUFFER_OFFSET(offset, buf->chan);
525
526 assert(buf_offset < buf->chan->subbuf_size*buf->chan->subbuf_cnt);
527
528 cpy = min_t(size_t, len, buf->buf_size - buf_offset);
529 ust_buffers_do_copy(buf->buf_data + buf_offset, src, cpy);
b73a4c47 530
b5b073e2
PMF
531 if (unlikely(len != cpy))
532 _ust_buffers_write(buf, buf_offset, src, len, cpy);
533 return len;
534}
535
dc284811
PMF
536extern int ust_buffers_get_subbuf(struct ust_buffer *buf, long *consumed);
537extern int ust_buffers_put_subbuf(struct ust_buffer *buf, unsigned long uconsumed_old);
538
539extern void init_ustrelay_transport(void);
b5b073e2
PMF
540
541#endif /* _UST_BUFFERS_H */
This page took 0.047443 seconds and 4 git commands to generate.