libust/buffers.h

   1 /*
   2  * buffers.h
   3  *
   4  * Copyright (C) 2009 - Pierre-Marc Fournier (pierre-marc dot fournier at polymtl dot ca)
   5  * Copyright (C) 2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
   6  *
   7  */
   8
   9 #ifndef _UST_BUFFERS_H
  10 #define _UST_BUFFERS_H
  11
  12 #include <kcompat/kref.h>
  13 #include <assert.h>
  14 #include "channels.h"
  15 #include "tracerconst.h"
  16 #include "tracercore.h"
  17 #include "header-inline.h"
  18 #include <usterr.h>
  19
  20 /***** FIXME: SHOULD BE REMOVED ***** */
  21
  22 /*
  23  * BUFFER_TRUNC zeroes the subbuffer offset and the subbuffer number parts of
  24  * the offset, which leaves only the buffer number.
  25  */
  26 #define BUFFER_TRUNC(offset, chan) \
  27         ((offset) & (~((chan)->alloc_size-1)))
  28 #define BUFFER_OFFSET(offset, chan) ((offset) & ((chan)->alloc_size - 1))
  29 #define SUBBUF_OFFSET(offset, chan) ((offset) & ((chan)->subbuf_size - 1))
  30 #define SUBBUF_ALIGN(offset, chan) \
  31         (((offset) + (chan)->subbuf_size) & (~((chan)->subbuf_size - 1)))
  32 #define SUBBUF_TRUNC(offset, chan) \
  33         ((offset) & (~((chan)->subbuf_size - 1)))
  34 #define SUBBUF_INDEX(offset, chan) \
  35         (BUFFER_OFFSET((offset), chan) >> (chan)->subbuf_size_order)
  36
  37 /*
  38  * Tracks changes to rchan/rchan_buf structs
  39  */
  40 #define UST_CHANNEL_VERSION             8
  41
  42 /**************************************/
  43
  44 struct commit_counters {
  45         long cc;                        /* ATOMIC */
  46         long cc_sb;                     /* ATOMIC - Incremented _once_ at sb switch */
  47 };
  48
  49 struct ust_buffer {
  50         /* First 32 bytes cache-hot cacheline */
  51         long offset;                    /* Current offset in the buffer *atomic* */
  52         struct commit_counters *commit_count;   /* Commit count per sub-buffer */
  53         long consumed;                  /* Current offset in the buffer *atomic* access (shared) */
  54         unsigned long last_tsc;         /*
  55                                          * Last timestamp written in the buffer.
  56                                          */
  57         /* End of first 32 bytes cacheline */
  58         long active_readers;    /* ATOMIC - Active readers count standard atomic access (shared) */
  59         long events_lost;       /* ATOMIC */
  60         long corrupted_subbuffers; /* *ATOMIC* */
  61         /* one byte is written to this pipe when data is available, in order
  62            to wake the consumer */
  63         /* portability: Single byte writes must be as quick as possible. The kernel-side
  64            buffer must be large enough so the writer doesn't block. From the pipe(7)
  65            man page: Since linux 2.6.11, the pipe capacity is 65536 bytes. */
  66         int data_ready_fd_write;
  67         /* the reading end of the pipe */
  68         int data_ready_fd_read;
  69
  70         unsigned int finalized;
  71 //ust// struct timer_list switch_timer; /* timer for periodical switch */
  72         unsigned long switch_timer_interval; /* 0 = unset */
  73
  74         struct ust_channel *chan;
  75
  76         struct kref kref;
  77         void *buf_data;
  78         size_t buf_size;
  79         int shmid;
  80         unsigned int cpu;
  81
  82         /* commit count per subbuffer; must be at end of struct */
  83         long commit_seq[0] ____cacheline_aligned; /* ATOMIC */
  84 } ____cacheline_aligned;
  85
  86 /*
  87  * A switch is done during tracing or as a final flush after tracing (so it
  88  * won't write in the new sub-buffer).
  89  * FIXME: make this message clearer
  90  */
  91 enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
  92
  93 extern int ltt_reserve_slot_lockless_slow(struct ust_trace *trace,
  94                 struct ust_channel *ltt_channel, void **transport_data,
  95                 size_t data_size, size_t *slot_size, long *buf_offset, u64 *tsc,
  96                 unsigned int *rflags, int largest_align, int cpu);
  97
  98 extern void ltt_force_switch_lockless_slow(struct ust_buffer *buf,
  99                 enum force_switch_mode mode);
 100
 101
 102 static __inline__ void ust_buffers_do_copy(void *dest, const void *src, size_t len)
 103 {
 104         union {
 105                 const void *src;
 106                 const u8 *src8;
 107                 const u16 *src16;
 108                 const u32 *src32;
 109                 const u64 *src64;
 110         } u = { .src = src };
 111
 112         switch (len) {
 113         case 0: break;
 114         case 1: *(u8 *)dest = *u.src8;
 115                 break;
 116         case 2: *(u16 *)dest = *u.src16;
 117                 break;
 118         case 4: *(u32 *)dest = *u.src32;
 119                 break;
 120         case 8: *(u64 *)dest = *u.src64;
 121                 break;
 122         default:
 123                 memcpy(dest, src, len);
 124         }
 125 }
 126
 127 static __inline__ void *ust_buffers_offset_address(struct ust_buffer *buf, size_t offset)
 128 {
 129         return ((char *)buf->buf_data)+offset;
 130 }
 131
 132 /*
 133  * Last TSC comparison functions. Check if the current TSC overflows
 134  * LTT_TSC_BITS bits from the last TSC read. Reads and writes last_tsc
 135  * atomically.
 136  */
 137
 138 /* FIXME: does this test work properly? */
 139 #if (BITS_PER_LONG == 32)
 140 static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
 141                                         u64 tsc)
 142 {
 143         ltt_buf->last_tsc = (unsigned long)(tsc >> LTT_TSC_BITS);
 144 }
 145
 146 static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
 147                                         u64 tsc)
 148 {
 149         unsigned long tsc_shifted = (unsigned long)(tsc >> LTT_TSC_BITS);
 150
 151         if (unlikely((tsc_shifted - ltt_buf->last_tsc)))
 152                 return 1;
 153         else
 154                 return 0;
 155 }
 156 #else
 157 static __inline__ void save_last_tsc(struct ust_buffer *ltt_buf,
 158                                         u64 tsc)
 159 {
 160         ltt_buf->last_tsc = (unsigned long)tsc;
 161 }
 162
 163 static __inline__ int last_tsc_overflow(struct ust_buffer *ltt_buf,
 164                                         u64 tsc)
 165 {
 166         if (unlikely((tsc - ltt_buf->last_tsc) >> LTT_TSC_BITS))
 167                 return 1;
 168         else
 169                 return 0;
 170 }
 171 #endif
 172
 173 static __inline__ void ltt_reserve_push_reader(
 174                 struct ust_channel *rchan,
 175                 struct ust_buffer *buf,
 176                 long offset)
 177 {
 178         long consumed_old, consumed_new;
 179
 180         do {
 181                 consumed_old = uatomic_read(&buf->consumed);
 182                 /*
 183                  * If buffer is in overwrite mode, push the reader consumed
 184                  * count if the write position has reached it and we are not
 185                  * at the first iteration (don't push the reader farther than
 186                  * the writer). This operation can be done concurrently by many
 187                  * writers in the same buffer, the writer being at the farthest
 188                  * write position sub-buffer index in the buffer being the one
 189                  * which will win this loop.
 190                  * If the buffer is not in overwrite mode, pushing the reader
 191                  * only happens if a sub-buffer is corrupted.
 192                  */
 193                 if (unlikely((SUBBUF_TRUNC(offset, buf->chan)
 194                    - SUBBUF_TRUNC(consumed_old, buf->chan))
 195                    >= rchan->alloc_size))
 196                         consumed_new = SUBBUF_ALIGN(consumed_old, buf->chan);
 197                 else
 198                         return;
 199         } while (unlikely(uatomic_cmpxchg(&buf->consumed, consumed_old,
 200                         consumed_new) != consumed_old));
 201 }
 202
 203 static __inline__ void ltt_vmcore_check_deliver(
 204                 struct ust_buffer *buf,
 205                 long commit_count, long idx)
 206 {
 207         uatomic_set(&buf->commit_seq[idx], commit_count);
 208 }
 209
 210 static __inline__ void ltt_check_deliver(struct ust_channel *chan,
 211                 struct ust_buffer *buf,
 212                 long offset, long commit_count, long idx)
 213 {
 214         long old_commit_count = commit_count - chan->subbuf_size;
 215
 216         /* Check if all commits have been done */
 217         if (unlikely((BUFFER_TRUNC(offset, chan)
 218                         >> chan->n_subbufs_order)
 219                         - (old_commit_count
 220                            & chan->commit_count_mask) == 0)) {
 221                 /*
 222                  * If we succeeded in updating the cc_sb, we are delivering
 223                  * the subbuffer. Deals with concurrent updates of the "cc"
 224                  * value without adding a add_return atomic operation to the
 225                  * fast path.
 226                  */
 227                 if (likely(uatomic_cmpxchg(&buf->commit_count[idx].cc_sb,
 228                                          old_commit_count, commit_count)
 229                            == old_commit_count)) {
 230                         int result;
 231
 232                         /*
 233                          * Set noref flag for this subbuffer.
 234                          */
 235 //ust//                 ltt_set_noref_flag(rchan, buf, idx);
 236                         ltt_vmcore_check_deliver(buf, commit_count, idx);
 237
 238                         /* wakeup consumer */
 239                         result = write(buf->data_ready_fd_write, "1", 1);
 240                         if(result == -1) {
 241                                 PERROR("write (in ltt_relay_buffer_flush)");
 242                                 ERR("this should never happen!");
 243                         }
 244                 }
 245         }
 246 }
 247
 248 static __inline__ int ltt_poll_deliver(struct ust_channel *chan, struct ust_buffer *buf)
 249 {
 250         long consumed_old, consumed_idx, commit_count, write_offset;
 251
 252         consumed_old = uatomic_read(&buf->consumed);
 253         consumed_idx = SUBBUF_INDEX(consumed_old, buf->chan);
 254         commit_count = uatomic_read(&buf->commit_count[consumed_idx].cc_sb);
 255         /*
 256          * No memory barrier here, since we are only interested
 257          * in a statistically correct polling result. The next poll will
 258          * get the data is we are racing. The mb() that ensures correct
 259          * memory order is in get_subbuf.
 260          */
 261         write_offset = uatomic_read(&buf->offset);
 262
 263         /*
 264          * Check that the subbuffer we are trying to consume has been
 265          * already fully committed.
 266          */
 267
 268         if (((commit_count - chan->subbuf_size)
 269              & chan->commit_count_mask)
 270             - (BUFFER_TRUNC(consumed_old, buf->chan)
 271                >> chan->n_subbufs_order)
 272             != 0)
 273                 return 0;
 274
 275         /*
 276          * Check that we are not about to read the same subbuffer in
 277          * which the writer head is.
 278          */
 279         if ((SUBBUF_TRUNC(write_offset, buf->chan)
 280            - SUBBUF_TRUNC(consumed_old, buf->chan))
 281            == 0)
 282                 return 0;
 283
 284         return 1;
 285
 286 }
 287
 288 /*
 289  * returns 0 if reserve ok, or 1 if the slow path must be taken.
 290  */
 291 static __inline__ int ltt_relay_try_reserve(
 292                 struct ust_channel *chan,
 293                 struct ust_buffer *buf,
 294                 size_t data_size,
 295                 u64 *tsc, unsigned int *rflags, int largest_align,
 296                 long *o_begin, long *o_end, long *o_old,
 297                 size_t *before_hdr_pad, size_t *size)
 298 {
 299         *o_begin = uatomic_read(&buf->offset);
 300         *o_old = *o_begin;
 301
 302         *tsc = trace_clock_read64();
 303
 304 //ust// #ifdef CONFIG_LTT_VMCORE
 305 //ust//         prefetch(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
 306 //ust//         prefetch(&buf->commit_seq[SUBBUF_INDEX(*o_begin, rchan)]);
 307 //ust// #else
 308 //ust//         prefetchw(&buf->commit_count[SUBBUF_INDEX(*o_begin, rchan)]);
 309 //ust// #endif
 310         if (last_tsc_overflow(buf, *tsc))
 311                 *rflags = LTT_RFLAG_ID_SIZE_TSC;
 312
 313         if (unlikely(SUBBUF_OFFSET(*o_begin, buf->chan) == 0))
 314                 return 1;
 315
 316         *size = ust_get_header_size(chan,
 317                                 *o_begin, data_size,
 318                                 before_hdr_pad, *rflags);
 319         *size += ltt_align(*o_begin + *size, largest_align) + data_size;
 320         if (unlikely((SUBBUF_OFFSET(*o_begin, buf->chan) + *size)
 321                      > buf->chan->subbuf_size))
 322                 return 1;
 323
 324         /*
 325          * Event fits in the current buffer and we are not on a switch
 326          * boundary. It's safe to write.
 327          */
 328         *o_end = *o_begin + *size;
 329
 330         if (unlikely((SUBBUF_OFFSET(*o_end, buf->chan)) == 0))
 331                 /*
 332                  * The offset_end will fall at the very beginning of the next
 333                  * subbuffer.
 334                  */
 335                 return 1;
 336
 337         return 0;
 338 }
 339
 340 static __inline__ int ltt_reserve_slot(struct ust_trace *trace,
 341                 struct ust_channel *chan, void **transport_data,
 342                 size_t data_size, size_t *slot_size, long *buf_offset, u64 *tsc,
 343                 unsigned int *rflags, int largest_align, int cpu)
 344 {
 345         struct ust_buffer *buf = chan->buf[cpu];
 346         long o_begin, o_end, o_old;
 347         size_t before_hdr_pad;
 348
 349         /*
 350          * Perform retryable operations.
 351          */
 352         /* FIXME: make this rellay per cpu? */
 353         if (unlikely(LOAD_SHARED(ltt_nesting) > 4)) {
 354                 DBG("Dropping event because nesting is too deep.");
 355                 uatomic_inc(&buf->events_lost);
 356                 return -EPERM;
 357         }
 358
 359         if (unlikely(ltt_relay_try_reserve(chan, buf,
 360                         data_size, tsc, rflags,
 361                         largest_align, &o_begin, &o_end, &o_old,
 362                         &before_hdr_pad, slot_size)))
 363                 goto slow_path;
 364
 365         if (unlikely(uatomic_cmpxchg(&buf->offset, o_old, o_end) != o_old))
 366                 goto slow_path;
 367
 368         /*
 369          * Atomically update last_tsc. This update races against concurrent
 370          * atomic updates, but the race will always cause supplementary full TSC
 371          * events, never the opposite (missing a full TSC event when it would be
 372          * needed).
 373          */
 374         save_last_tsc(buf, *tsc);
 375
 376         /*
 377          * Push the reader if necessary
 378          */
 379         ltt_reserve_push_reader(chan, buf, o_end - 1);
 380
 381         /*
 382          * Clear noref flag for this subbuffer.
 383          */
 384 //ust// ltt_clear_noref_flag(chan, buf, SUBBUF_INDEX(o_end - 1, chan));
 385
 386         *buf_offset = o_begin + before_hdr_pad;
 387         return 0;
 388 slow_path:
 389         return ltt_reserve_slot_lockless_slow(trace, chan,
 390                 transport_data, data_size, slot_size, buf_offset, tsc,
 391                 rflags, largest_align, cpu);
 392 }
 393
 394 /*
 395  * Force a sub-buffer switch for a per-cpu buffer. This operation is
 396  * completely reentrant : can be called while tracing is active with
 397  * absolutely no lock held.
 398  */
 399 static __inline__ void ltt_force_switch(struct ust_buffer *buf,
 400                 enum force_switch_mode mode)
 401 {
 402         return ltt_force_switch_lockless_slow(buf, mode);
 403 }
 404
 405 /*
 406  * for flight recording. must be called after relay_commit.
 407  * This function increments the subbuffers's commit_seq counter each time the
 408  * commit count reaches back the reserve offset (module subbuffer size). It is
 409  * useful for crash dump.
 410  */
 411 //ust// #ifdef CONFIG_LTT_VMCORE
 412 static __inline__ void ltt_write_commit_counter(struct ust_channel *chan,
 413                 struct ust_buffer *buf, long idx, long buf_offset,
 414                 long commit_count, size_t data_size)
 415 {
 416         long offset;
 417         long commit_seq_old;
 418
 419         offset = buf_offset + data_size;
 420
 421         /*
 422          * SUBBUF_OFFSET includes commit_count_mask. We can simply
 423          * compare the offsets within the subbuffer without caring about
 424          * buffer full/empty mismatch because offset is never zero here
 425          * (subbuffer header and event headers have non-zero length).
 426          */
 427         if (unlikely(SUBBUF_OFFSET(offset - commit_count, buf->chan)))
 428                 return;
 429
 430         commit_seq_old = uatomic_read(&buf->commit_seq[idx]);
 431         while (commit_seq_old < commit_count)
 432                 commit_seq_old = uatomic_cmpxchg(&buf->commit_seq[idx],
 433                                          commit_seq_old, commit_count);
 434
 435         DBG("commit_seq for channel %s_%d, subbuf %ld is now %ld", buf->chan->channel_name, buf->cpu, idx, commit_count);
 436 }
 437 //ust// #else
 438 //ust// static __inline__ void ltt_write_commit_counter(struct ust_buffer *buf,
 439 //ust//                 long idx, long buf_offset, long commit_count, size_t data_size)
 440 //ust// {
 441 //ust// }
 442 //ust// #endif
 443
 444 /*
 445  * Atomic unordered slot commit. Increments the commit count in the
 446  * specified sub-buffer, and delivers it if necessary.
 447  *
 448  * Parameters:
 449  *
 450  * @ltt_channel : channel structure
 451  * @transport_data: transport-specific data
 452  * @buf_offset : offset following the event header.
 453  * @data_size : size of the event data.
 454  * @slot_size : size of the reserved slot.
 455  */
 456 static __inline__ void ltt_commit_slot(
 457                 struct ust_channel *chan,
 458                 struct ust_buffer *buf, long buf_offset,
 459                 size_t data_size, size_t slot_size)
 460 {
 461         long offset_end = buf_offset;
 462         long endidx = SUBBUF_INDEX(offset_end - 1, chan);
 463         long commit_count;
 464
 465 #ifdef LTT_NO_IPI_BARRIER
 466         smp_wmb();
 467 #else
 468         /*
 469          * Must write slot data before incrementing commit count.
 470          * This compiler barrier is upgraded into a smp_mb() by the IPI
 471          * sent by get_subbuf().
 472          */
 473         barrier();
 474 #endif
 475         uatomic_add(&buf->commit_count[endidx].cc, slot_size);
 476         /*
 477          * commit count read can race with concurrent OOO commit count updates.
 478          * This is only needed for ltt_check_deliver (for non-polling delivery
 479          * only) and for ltt_write_commit_counter. The race can only cause the
 480          * counter to be read with the same value more than once, which could
 481          * cause :
 482          * - Multiple delivery for the same sub-buffer (which is handled
 483          *   gracefully by the reader code) if the value is for a full
 484          *   sub-buffer. It's important that we can never miss a sub-buffer
 485          *   delivery. Re-reading the value after the uatomic_add ensures this.
 486          * - Reading a commit_count with a higher value that what was actually
 487          *   added to it for the ltt_write_commit_counter call (again caused by
 488          *   a concurrent committer). It does not matter, because this function
 489          *   is interested in the fact that the commit count reaches back the
 490          *   reserve offset for a specific sub-buffer, which is completely
 491          *   independent of the order.
 492          */
 493         commit_count = uatomic_read(&buf->commit_count[endidx].cc);
 494
 495         ltt_check_deliver(chan, buf, offset_end - 1, commit_count, endidx);
 496         /*
 497          * Update data_size for each commit. It's needed only for extracting
 498          * ltt buffers from vmcore, after crash.
 499          */
 500         ltt_write_commit_counter(chan, buf, endidx, buf_offset, commit_count, data_size);
 501 }
 502
 503 void _ust_buffers_write(struct ust_buffer *buf, size_t offset,
 504         const void *src, size_t len, ssize_t cpy);
 505
 506 static __inline__ int ust_buffers_write(struct ust_buffer *buf, size_t offset,
 507         const void *src, size_t len)
 508 {
 509         size_t cpy;
 510         size_t buf_offset = BUFFER_OFFSET(offset, buf->chan);
 511
 512         assert(buf_offset < buf->chan->subbuf_size*buf->chan->subbuf_cnt);
 513
 514         cpy = min_t(size_t, len, buf->buf_size - buf_offset);
 515         ust_buffers_do_copy(buf->buf_data + buf_offset, src, cpy);
 516
 517         if (unlikely(len != cpy))
 518                 _ust_buffers_write(buf, buf_offset, src, len, cpy);
 519         return len;
 520 }
 521
 522 extern int ust_buffers_get_subbuf(struct ust_buffer *buf, long *consumed);
 523 extern int ust_buffers_put_subbuf(struct ust_buffer *buf, unsigned long uconsumed_old);
 524
 525 extern void init_ustrelay_transport(void);
 526
 527 #endif /* _UST_BUFFERS_H */