add custom probes support and update tracepoints
[ust.git] / libust / buffers.h
1 /*
2 * buffers.h
3 * LTTng userspace tracer buffering system
4 *
5 * Copyright (C) 2009 - Pierre-Marc Fournier (pierre-marc dot fournier at polymtl dot ca)
6 * Copyright (C) 2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #ifndef _UST_BUFFERS_H
24 #define _UST_BUFFERS_H
25
26 #include <assert.h>
27 #include <ust/kernelcompat.h>
28 #include "usterr.h"
29 #include "channels.h"
30 #include "tracerconst.h"
31 #include "tracercore.h"
32 #include "header-inline.h"
33
34 /***** FIXME: SHOULD BE REMOVED ***** */
35
36 /*
37 * BUFFER_TRUNC zeroes the subbuffer offset and the subbuffer number parts of
38 * the offset, which leaves only the buffer number.
39 */
40 #define BUFFER_TRUNC(offset, chan) \
41 ((offset) & (~((chan)->alloc_size-1)))
42 #define BUFFER_OFFSET(offset, chan) ((offset) & ((chan)->alloc_size - 1))
43 #define SUBBUF_OFFSET(offset, chan) ((offset) & ((chan)->subbuf_size - 1))
44 #define SUBBUF_ALIGN(offset, chan) \
45 (((offset) + (chan)->subbuf_size) & (~((chan)->subbuf_size - 1)))
46 #define SUBBUF_TRUNC(offset, chan) \
47 ((offset) & (~((chan)->subbuf_size - 1)))
48 #define SUBBUF_INDEX(offset, chan) \
49 (BUFFER_OFFSET((offset), chan) >> (chan)->subbuf_size_order)
50
51 /*
52 * Tracks changes to rchan/rchan_buf structs
53 */
54 #define UST_CHANNEL_VERSION 8
55
56 /**************************************/
57
58 struct commit_counters {
59 long cc; /* ATOMIC */
60 long cc_sb; /* ATOMIC - Incremented _once_ at sb switch */
61 };
62
63 struct ust_buffer {
64 /* First 32 bytes cache-hot cacheline */
65 long offset; /* Current offset in the buffer *atomic* */
66 struct commit_counters *commit_count; /* Commit count per sub-buffer */
67 long consumed; /* Current offset in the buffer *atomic* access (shared) */
68 unsigned long last_tsc; /*
69 * Last timestamp written in the buffer.
70 */
71 /* End of first 32 bytes cacheline */
72 long active_readers; /* ATOMIC - Active readers count standard atomic access (shared) */
73 long events_lost; /* ATOMIC */
74 long corrupted_subbuffers; /* *ATOMIC* */
75 /* one byte is written to this pipe when data is available, in order
76 to wake the consumer */
77 /* portability: Single byte writes must be as quick as possible. The kernel-side
78 buffer must be large enough so the writer doesn't block. From the pipe(7)
79 man page: Since linux 2.6.11, the pipe capacity is 65536 bytes. */
80 int data_ready_fd_write;
81 /* the reading end of the pipe */
82 int data_ready_fd_read;
83
84 unsigned int finalized;
85 //ust// struct timer_list switch_timer; /* timer for periodical switch */
86 unsigned long switch_timer_interval; /* 0 = unset */
87
88 struct ust_channel *chan;
89
90 struct kref kref;
91 void *buf_data;
92 size_t buf_size;
93 int shmid;
94 unsigned int cpu;
95
96 /* commit count per subbuffer; must be at end of struct */
97 long commit_seq[0] ____cacheline_aligned; /* ATOMIC */
98 } ____cacheline_aligned;
99
100 /*
101 * A switch is done during tracing or as a final flush after tracing (so it
102 * won't write in the new sub-buffer).
103 * FIXME: make this message clearer
104 */
105 enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
106
107 extern int ltt_reserve_slot_lockless_slow(struct ust_channel *chan,
108 struct ust_trace *trace, size_t data_size,
109 int largest_align, int cpu,
110 struct ust_buffer **ret_buf,
111 size_t *slot_size, long *buf_offset,
112 u64 *tsc, unsigned int *rflags);
113
114 extern void ltt_force_switch_lockless_slow(struct ust_buffer *buf,
115 enum force_switch_mode mode);
116
117
118 static __inline__ void ust_buffers_do_copy(void *dest, const void *src, size_t len)
119 {
120 union {
121 const void *src;
122 const u8 *src8;
123 const u16 *src16;
124 const u32 *src32;
125 const u64 *src64;
126 } u = { .src = src };
127
128 switch (len) {
129 case 0: break;
130 case 1: *(u8 *)dest = *u.src8;
131 break;
132 case 2: *(u16 *)dest = *u.src16;
133 break;
134 case 4: *(u32 *)dest = *u.src32;
135 break;
136 case 8: *(u64 *)dest = *u.src64;
137 break;
138 default:
139 memcpy(dest, src, len);
140 }
141 }
142
143 static __inline__ void *ust_buffers_offset_address(struct ust_buffer *buf, size_t offset)
144 {
145 return ((char *)buf->buf_data)+offset;
146 }
147
148 /*
149 * Last TSC comparison functions. Check if the current TSC overflows
150 * LTT_TSC_BITS bits from the last TSC read. Reads and writes last_tsc
151 * atomically.
152 */
153
154 /* FIXME: does this test work properly? */
155 #if (BITS_PER_LONG == 32)
156 static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
157 u64 tsc)
158 {
159 ltt_buf->last_tsc = (unsigned long)(tsc >> LTT_TSC_BITS);
160 }
161
162 static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
163 u64 tsc)
164 {
165 unsigned long tsc_shifted = (unsigned long)(tsc >> LTT_TSC_BITS);
166
167 if (unlikely((tsc_shifted - ltt_buf->last_tsc)))
168 return 1;
169 else
170 return 0;
171 }
172 #else
173 static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
174 u64 tsc)
175 {
176 ltt_buf->last_tsc = (unsigned long)tsc;
177 }
178
179 static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
180 u64 tsc)
181 {
182 if (unlikely((tsc - ltt_buf->last_tsc) >> LTT_TSC_BITS))
183 return 1;
184 else
185 return 0;
186 }
187 #endif
188
189 static __inline__ void ltt_reserve_push_reader(
190 struct ust_channel *rchan,
191 struct ust_buffer *buf,
192 long offset)
193 {
194 long consumed_old, consumed_new;
195
196 do {
197 consumed_old = uatomic_read(&buf->consumed);
198 /*
199 * If buffer is in overwrite mode, push the reader consumed
200 * count if the write position has reached it and we are not
201 * at the first iteration (don't push the reader farther than
202 * the writer). This operation can be done concurrently by many
203 * writers in the same buffer, the writer being at the farthest
204 * write position sub-buffer index in the buffer being the one
205 * which will win this loop.
206 * If the buffer is not in overwrite mode, pushing the reader
207 * only happens if a sub-buffer is corrupted.
208 */
209 if (unlikely((SUBBUF_TRUNC(offset, buf->chan)
210 - SUBBUF_TRUNC(consumed_old, buf->chan))
211 >= rchan->alloc_size))
212 consumed_new = SUBBUF_ALIGN(consumed_old, buf->chan);
213 else
214 return;
215 } while (unlikely(uatomic_cmpxchg(&buf->consumed, consumed_old,
216 consumed_new) != consumed_old));
217 }
218
219 static __inline__ void ltt_vmcore_check_deliver(
220 struct ust_buffer *buf,
221 long commit_count, long idx)
222 {
223 uatomic_set(&buf->commit_seq[idx], commit_count);
224 }
225
226 static __inline__ void ltt_check_deliver(struct ust_channel *chan,
227 struct ust_buffer *buf,
228 long offset, long commit_count, long idx)
229 {
230 long old_commit_count = commit_count - chan->subbuf_size;
231
232 /* Check if all commits have been done */
233 if (unlikely((BUFFER_TRUNC(offset, chan)
234 >> chan->n_subbufs_order)
235 - (old_commit_count
236 & chan->commit_count_mask) == 0)) {
237 /*
238 * If we succeeded in updating the cc_sb, we are delivering
239 * the subbuffer. Deals with concurrent updates of the "cc"
240 * value without adding a add_return atomic operation to the
241 * fast path.
242 */
243 if (likely(uatomic_cmpxchg(&buf->commit_count[idx].cc_sb,
244 old_commit_count, commit_count)
245 == old_commit_count)) {
246 int result;
247
248 /*
249 * Set noref flag for this subbuffer.
250 */
251 //ust// ltt_set_noref_flag(rchan, buf, idx);
252 ltt_vmcore_check_deliver(buf, commit_count, idx);
253
254 /* wakeup consumer */
255 result = write(buf->data_ready_fd_write, "1", 1);
256 if(result == -1) {
257 PERROR("write (in ltt_relay_buffer_flush)");
258 ERR("this should never happen!");
259 }
260 }
261 }
262 }
263
264 static __inline__ int ltt_poll_deliver(struct ust_channel *chan, struct ust_buffer *buf)
265 {
266 long consumed_old, consumed_idx, commit_count, write_offset;
267
268 consumed_old = uatomic_read(&buf->consumed);
269 consumed_idx = SUBBUF_INDEX(consumed_old, buf->chan);
270 commit_count = uatomic_read(&buf->commit_count[consumed_idx].cc_sb);
271 /*
272 * No memory barrier here, since we are only interested
273 * in a statistically correct polling result. The next poll will
274 * get the data is we are racing. The mb() that ensures correct
275 * memory order is in get_subbuf.
276 */
277 write_offset = uatomic_read(&buf->offset);
278
279 /*
280 * Check that the subbuffer we are trying to consume has been
281 * already fully committed.
282 */
283
284 if (((commit_count - chan->subbuf_size)
285 & chan->commit_count_mask)
286 - (BUFFER_TRUNC(consumed_old, buf->chan)
287 >> chan->n_subbufs_order)
288 != 0)
289 return 0;
290
291 /*
292 * Check that we are not about to read the same subbuffer in
293 * which the writer head is.
294 */
295 if ((SUBBUF_TRUNC(write_offset, buf->chan)
296 - SUBBUF_TRUNC(consumed_old, buf->chan))
297 == 0)
298 return 0;
299
300 return 1;
301
302 }
303
304 /*
305 * returns 0 if reserve ok, or 1 if the slow path must be taken.
306 */
307 static __inline__ int ltt_relay_try_reserve(
308 struct ust_channel *chan,
309 struct ust_buffer *buf,
310 size_t data_size,
311 u64 *tsc, unsigned int *rflags, int largest_align,
312 long *o_begin, long *o_end, long *o_old,
313 size_t *before_hdr_pad, size_t *size)
314 {
315 *o_begin = uatomic_read(&buf->offset);
316 *o_old = *o_begin;
317
318 *tsc = trace_clock_read64();
319
320 //ust// #ifdef CONFIG_LTT_VMCORE
321 //ust// prefetch(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
322 //ust// prefetch(&buf->commit_seq[SUBBUF_INDEX(*o_begin, rchan)]);
323 //ust// #else
324 //ust// prefetchw(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
325 //ust// #endif
326 if (last_tsc_overflow(buf, *tsc))
327 *rflags = LTT_RFLAG_ID_SIZE_TSC;
328
329 if (unlikely(SUBBUF_OFFSET(*o_begin, buf->chan) == 0))
330 return 1;
331
332 *size = ust_get_header_size(chan,
333 *o_begin, data_size,
334 before_hdr_pad, *rflags);
335 *size += ltt_align(*o_begin + *size, largest_align) + data_size;
336 if (unlikely((SUBBUF_OFFSET(*o_begin, buf->chan) + *size)
337 > buf->chan->subbuf_size))
338 return 1;
339
340 /*
341 * Event fits in the current buffer and we are not on a switch
342 * boundary. It's safe to write.
343 */
344 *o_end = *o_begin + *size;
345
346 if (unlikely((SUBBUF_OFFSET(*o_end, buf->chan)) == 0))
347 /*
348 * The offset_end will fall at the very beginning of the next
349 * subbuffer.
350 */
351 return 1;
352
353 return 0;
354 }
355
356 static __inline__ int ltt_reserve_slot(struct ust_channel *chan,
357 struct ust_trace *trace, size_t data_size,
358 int largest_align, int cpu,
359 struct ust_buffer **ret_buf,
360 size_t *slot_size, long *buf_offset, u64 *tsc,
361 unsigned int *rflags)
362 {
363 struct ust_buffer *buf = *ret_buf = chan->buf[cpu];
364 long o_begin, o_end, o_old;
365 size_t before_hdr_pad;
366
367 /*
368 * Perform retryable operations.
369 */
370 /* FIXME: make this rellay per cpu? */
371 if (unlikely(LOAD_SHARED(ltt_nesting) > 4)) {
372 DBG("Dropping event because nesting is too deep.");
373 uatomic_inc(&buf->events_lost);
374 return -EPERM;
375 }
376
377 if (unlikely(ltt_relay_try_reserve(chan, buf,
378 data_size, tsc, rflags,
379 largest_align, &o_begin, &o_end, &o_old,
380 &before_hdr_pad, slot_size)))
381 goto slow_path;
382
383 if (unlikely(uatomic_cmpxchg(&buf->offset, o_old, o_end) != o_old))
384 goto slow_path;
385
386 /*
387 * Atomically update last_tsc. This update races against concurrent
388 * atomic updates, but the race will always cause supplementary full TSC
389 * events, never the opposite (missing a full TSC event when it would be
390 * needed).
391 */
392 save_last_tsc(buf, *tsc);
393
394 /*
395 * Push the reader if necessary
396 */
397 ltt_reserve_push_reader(chan, buf, o_end - 1);
398
399 /*
400 * Clear noref flag for this subbuffer.
401 */
402 //ust// ltt_clear_noref_flag(chan, buf, SUBBUF_INDEX(o_end - 1, chan));
403
404 *buf_offset = o_begin + before_hdr_pad;
405 return 0;
406 slow_path:
407 return ltt_reserve_slot_lockless_slow(chan, trace, data_size,
408 largest_align, cpu, ret_buf,
409 slot_size, buf_offset, tsc,
410 rflags);
411 }
412
413 /*
414 * Force a sub-buffer switch for a per-cpu buffer. This operation is
415 * completely reentrant : can be called while tracing is active with
416 * absolutely no lock held.
417 */
418 static __inline__ void ltt_force_switch(struct ust_buffer *buf,
419 enum force_switch_mode mode)
420 {
421 return ltt_force_switch_lockless_slow(buf, mode);
422 }
423
424 /*
425 * for flight recording. must be called after relay_commit.
426 * This function increments the subbuffers's commit_seq counter each time the
427 * commit count reaches back the reserve offset (module subbuffer size). It is
428 * useful for crash dump.
429 */
430 //ust// #ifdef CONFIG_LTT_VMCORE
431 static __inline__ void ltt_write_commit_counter(struct ust_channel *chan,
432 struct ust_buffer *buf, long idx, long buf_offset,
433 long commit_count, size_t data_size)
434 {
435 long offset;
436 long commit_seq_old;
437
438 offset = buf_offset + data_size;
439
440 /*
441 * SUBBUF_OFFSET includes commit_count_mask. We can simply
442 * compare the offsets within the subbuffer without caring about
443 * buffer full/empty mismatch because offset is never zero here
444 * (subbuffer header and event headers have non-zero length).
445 */
446 if (unlikely(SUBBUF_OFFSET(offset - commit_count, buf->chan)))
447 return;
448
449 commit_seq_old = uatomic_read(&buf->commit_seq[idx]);
450 while (commit_seq_old < commit_count)
451 commit_seq_old = uatomic_cmpxchg(&buf->commit_seq[idx],
452 commit_seq_old, commit_count);
453
454 DBG("commit_seq for channel %s_%d, subbuf %ld is now %ld", buf->chan->channel_name, buf->cpu, idx, commit_count);
455 }
456 //ust// #else
457 //ust// static __inline__ void ltt_write_commit_counter(struct ust_buffer *buf,
458 //ust// long idx, long buf_offset, long commit_count, size_t data_size)
459 //ust// {
460 //ust// }
461 //ust// #endif
462
463 /*
464 * Atomic unordered slot commit. Increments the commit count in the
465 * specified sub-buffer, and delivers it if necessary.
466 *
467 * Parameters:
468 *
469 * @ltt_channel : channel structure
470 * @transport_data: transport-specific data
471 * @buf_offset : offset following the event header.
472 * @data_size : size of the event data.
473 * @slot_size : size of the reserved slot.
474 */
475 static __inline__ void ltt_commit_slot(
476 struct ust_channel *chan,
477 struct ust_buffer *buf, long buf_offset,
478 size_t data_size, size_t slot_size)
479 {
480 long offset_end = buf_offset;
481 long endidx = SUBBUF_INDEX(offset_end - 1, chan);
482 long commit_count;
483
484 #ifdef LTT_NO_IPI_BARRIER
485 smp_wmb();
486 #else
487 /*
488 * Must write slot data before incrementing commit count.
489 * This compiler barrier is upgraded into a smp_mb() by the IPI
490 * sent by get_subbuf().
491 */
492 barrier();
493 #endif
494 uatomic_add(&buf->commit_count[endidx].cc, slot_size);
495 /*
496 * commit count read can race with concurrent OOO commit count updates.
497 * This is only needed for ltt_check_deliver (for non-polling delivery
498 * only) and for ltt_write_commit_counter. The race can only cause the
499 * counter to be read with the same value more than once, which could
500 * cause :
501 * - Multiple delivery for the same sub-buffer (which is handled
502 * gracefully by the reader code) if the value is for a full
503 * sub-buffer. It's important that we can never miss a sub-buffer
504 * delivery. Re-reading the value after the uatomic_add ensures this.
505 * - Reading a commit_count with a higher value that what was actually
506 * added to it for the ltt_write_commit_counter call (again caused by
507 * a concurrent committer). It does not matter, because this function
508 * is interested in the fact that the commit count reaches back the
509 * reserve offset for a specific sub-buffer, which is completely
510 * independent of the order.
511 */
512 commit_count = uatomic_read(&buf->commit_count[endidx].cc);
513
514 ltt_check_deliver(chan, buf, offset_end - 1, commit_count, endidx);
515 /*
516 * Update data_size for each commit. It's needed only for extracting
517 * ltt buffers from vmcore, after crash.
518 */
519 ltt_write_commit_counter(chan, buf, endidx, buf_offset, commit_count, data_size);
520 }
521
522 void _ust_buffers_write(struct ust_buffer *buf, size_t offset,
523 const void *src, size_t len, ssize_t cpy);
524
525 static __inline__ int ust_buffers_write(struct ust_buffer *buf, size_t offset,
526 const void *src, size_t len)
527 {
528 size_t cpy;
529 size_t buf_offset = BUFFER_OFFSET(offset, buf->chan);
530
531 assert(buf_offset < buf->chan->subbuf_size*buf->chan->subbuf_cnt);
532
533 cpy = min_t(size_t, len, buf->buf_size - buf_offset);
534 ust_buffers_do_copy(buf->buf_data + buf_offset, src, cpy);
535
536 if (unlikely(len != cpy))
537 _ust_buffers_write(buf, buf_offset, src, len, cpy);
538 return len;
539 }
540
541 extern int ust_buffers_get_subbuf(struct ust_buffer *buf, long *consumed);
542 extern int ust_buffers_put_subbuf(struct ust_buffer *buf, unsigned long uconsumed_old);
543
544 extern void init_ustrelay_transport(void);
545
546 #endif /* _UST_BUFFERS_H */
This page took 0.041239 seconds and 4 git commands to generate.