| 1 | /* |
| 2 | * ltt/ltt-relay-lockless.c |
| 3 | * |
| 4 | * (C) Copyright 2005-2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) |
| 5 | * |
| 6 | * LTTng lockless buffer space management (reader/writer). |
| 7 | * |
| 8 | * Author: |
| 9 | * Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca) |
| 10 | * |
| 11 | * Inspired from LTT : |
| 12 | * Karim Yaghmour (karim@opersys.com) |
| 13 | * Tom Zanussi (zanussi@us.ibm.com) |
| 14 | * Bob Wisniewski (bob@watson.ibm.com) |
| 15 | * And from K42 : |
| 16 | * Bob Wisniewski (bob@watson.ibm.com) |
| 17 | * |
| 18 | * Changelog: |
| 19 | * 08/10/08, Cleanup. |
| 20 | * 19/10/05, Complete lockless mechanism. |
| 21 | * 27/05/05, Modular redesign and rewrite. |
| 22 | * |
| 23 | * Userspace reader semantic : |
| 24 | * while (poll fd != POLLHUP) { |
| 25 | * - ioctl RELAY_GET_SUBBUF_SIZE |
| 26 | * while (1) { |
| 27 | * - ioctl GET_SUBBUF |
| 28 | * - splice 1 subbuffer worth of data to a pipe |
| 29 | * - splice the data from pipe to disk/network |
| 30 | * - ioctl PUT_SUBBUF, check error value |
| 31 | * if err val < 0, previous subbuffer was corrupted. |
| 32 | * } |
| 33 | * } |
| 34 | * |
| 35 | * Dual LGPL v2.1/GPL v2 license. |
| 36 | */ |
| 37 | |
| 38 | #include <linux/time.h> |
| 39 | #include <linux/module.h> |
| 40 | #include <linux/string.h> |
| 41 | #include <linux/slab.h> |
| 42 | #include <linux/init.h> |
| 43 | #include <linux/rcupdate.h> |
| 44 | #include <linux/timer.h> |
| 45 | #include <linux/sched.h> |
| 46 | #include <linux/bitops.h> |
| 47 | #include <linux/smp_lock.h> |
| 48 | #include <linux/stat.h> |
| 49 | #include <linux/cpu.h> |
| 50 | #include <linux/idle.h> |
| 51 | #include <linux/delay.h> |
| 52 | #include <linux/notifier.h> |
| 53 | #include <asm/atomic.h> |
| 54 | #include <asm/local.h> |
| 55 | |
| 56 | #include "ltt-tracer.h" |
| 57 | #include "ltt-relay.h" |
| 58 | #include "ltt-relay-lockless.h" |
| 59 | |
| 60 | #if 0 |
| 61 | #define printk_dbg(fmt, args...) printk(fmt, args) |
| 62 | #else |
| 63 | #define printk_dbg(fmt, args...) |
| 64 | #endif |
| 65 | |
| 66 | struct ltt_reserve_switch_offsets { |
| 67 | long begin, end, old; |
| 68 | long begin_switch, end_switch_current, end_switch_old; |
| 69 | size_t before_hdr_pad, size; |
| 70 | }; |
| 71 | |
| 72 | static |
| 73 | void ltt_force_switch(struct ltt_chanbuf *buf, enum force_switch_mode mode); |
| 74 | |
| 75 | static |
| 76 | void ltt_relay_print_buffer_errors(struct ltt_chan *chan, unsigned int cpu); |
| 77 | |
| 78 | static const struct file_operations ltt_file_operations; |
| 79 | |
| 80 | static |
| 81 | void ltt_buffer_begin(struct ltt_chanbuf *buf, u64 tsc, unsigned int subbuf_idx) |
| 82 | { |
| 83 | struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); |
| 84 | struct ltt_subbuffer_header *header = |
| 85 | (struct ltt_subbuffer_header *) |
| 86 | ltt_relay_offset_address(&buf->a, |
| 87 | subbuf_idx * chan->a.sb_size); |
| 88 | |
| 89 | header->cycle_count_begin = tsc; |
| 90 | header->data_size = 0xFFFFFFFF; /* for debugging */ |
| 91 | ltt_write_trace_header(chan->a.trace, header); |
| 92 | } |
| 93 | |
| 94 | /* |
| 95 | * offset is assumed to never be 0 here : never deliver a completely empty |
| 96 | * subbuffer. The lost size is between 0 and subbuf_size-1. |
| 97 | */ |
| 98 | static |
| 99 | void ltt_buffer_end(struct ltt_chanbuf *buf, u64 tsc, unsigned int offset, |
| 100 | unsigned int subbuf_idx) |
| 101 | { |
| 102 | struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); |
| 103 | struct ltt_subbuffer_header *header = |
| 104 | (struct ltt_subbuffer_header *) |
| 105 | ltt_relay_offset_address(&buf->a, |
| 106 | subbuf_idx * chan->a.sb_size); |
| 107 | u32 data_size = SUBBUF_OFFSET(offset - 1, chan) + 1; |
| 108 | |
| 109 | header->data_size = data_size; |
| 110 | header->sb_size = PAGE_ALIGN(data_size); |
| 111 | header->cycle_count_end = tsc; |
| 112 | header->events_lost = local_read(&buf->events_lost); |
| 113 | header->subbuf_corrupt = local_read(&buf->corrupted_subbuffers); |
| 114 | } |
| 115 | |
| 116 | /* |
| 117 | * Must be called under trace lock or cpu hotplug protection. |
| 118 | */ |
| 119 | void ltt_chanbuf_free(struct ltt_chanbuf *buf) |
| 120 | { |
| 121 | struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); |
| 122 | |
| 123 | ltt_relay_print_buffer_errors(chan, buf->a.cpu); |
| 124 | #ifdef CONFIG_LTT_VMCORE |
| 125 | kfree(buf->commit_seq); |
| 126 | #endif |
| 127 | kfree(buf->commit_count); |
| 128 | |
| 129 | ltt_chanbuf_alloc_free(&buf->a); |
| 130 | } |
| 131 | |
| 132 | /* |
| 133 | * Must be called under trace lock or cpu hotplug protection. |
| 134 | */ |
| 135 | int ltt_chanbuf_create(struct ltt_chanbuf *buf, struct ltt_chan_alloc *chana, |
| 136 | int cpu) |
| 137 | { |
| 138 | struct ltt_chan *chan = container_of(chana, struct ltt_chan, a); |
| 139 | struct ltt_trace *trace = chana->trace; |
| 140 | unsigned int j, n_sb; |
| 141 | int ret; |
| 142 | |
| 143 | /* Test for cpu hotplug */ |
| 144 | if (buf->a.allocated) |
| 145 | return 0; |
| 146 | |
| 147 | ret = ltt_chanbuf_alloc_create(&buf->a, &chan->a, cpu); |
| 148 | if (ret) |
| 149 | return ret; |
| 150 | |
| 151 | buf->commit_count = |
| 152 | kzalloc_node(ALIGN(sizeof(*buf->commit_count) * chan->a.n_sb, |
| 153 | 1 << INTERNODE_CACHE_SHIFT), |
| 154 | GFP_KERNEL, cpu_to_node(cpu)); |
| 155 | if (!buf->commit_count) { |
| 156 | ret = -ENOMEM; |
| 157 | goto free_chanbuf; |
| 158 | } |
| 159 | |
| 160 | #ifdef CONFIG_LTT_VMCORE |
| 161 | buf->commit_seq = |
| 162 | kzalloc_node(ALIGN(sizeof(*buf->commit_seq) * chan->a.n_sb, |
| 163 | 1 << INTERNODE_CACHE_SHIFT), |
| 164 | GFP_KERNEL, cpu_to_node(cpu)); |
| 165 | if (!buf->commit_seq) { |
| 166 | kfree(buf->commit_count); |
| 167 | ret = -ENOMEM; |
| 168 | goto free_commit; |
| 169 | } |
| 170 | #endif |
| 171 | |
| 172 | local_set(&buf->offset, ltt_sb_header_size()); |
| 173 | atomic_long_set(&buf->consumed, 0); |
| 174 | atomic_long_set(&buf->active_readers, 0); |
| 175 | n_sb = chan->a.n_sb; |
| 176 | for (j = 0; j < n_sb; j++) { |
| 177 | local_set(&buf->commit_count[j].cc, 0); |
| 178 | local_set(&buf->commit_count[j].cc_sb, 0); |
| 179 | local_set(&buf->commit_count[j].events, 0); |
| 180 | } |
| 181 | init_waitqueue_head(&buf->write_wait); |
| 182 | init_waitqueue_head(&buf->read_wait); |
| 183 | spin_lock_init(&buf->full_lock); |
| 184 | |
| 185 | RCHAN_SB_CLEAR_NOREF(buf->a.buf_wsb[0].pages); |
| 186 | ltt_buffer_begin(buf, trace->start_tsc, 0); |
| 187 | /* atomic_add made on local variable on data that belongs to |
| 188 | * various CPUs : ok because tracing not started (for this cpu). */ |
| 189 | local_add(ltt_sb_header_size(), &buf->commit_count[0].cc); |
| 190 | |
| 191 | local_set(&buf->events_lost, 0); |
| 192 | local_set(&buf->corrupted_subbuffers, 0); |
| 193 | buf->finalized = 0; |
| 194 | |
| 195 | ret = ltt_chanbuf_create_file(chan->a.filename, chan->a.parent, |
| 196 | S_IRUSR, buf); |
| 197 | if (ret) |
| 198 | goto free_init; |
| 199 | |
| 200 | /* |
| 201 | * Ensure the buffer is ready before setting it to allocated. |
| 202 | * Used for cpu hotplug vs async wakeup. |
| 203 | */ |
| 204 | smp_wmb(); |
| 205 | buf->a.allocated = 1; |
| 206 | |
| 207 | return 0; |
| 208 | |
| 209 | /* Error handling */ |
| 210 | free_init: |
| 211 | #ifdef CONFIG_LTT_VMCORE |
| 212 | kfree(buf->commit_seq); |
| 213 | free_commit: |
| 214 | #endif |
| 215 | kfree(buf->commit_count); |
| 216 | free_chanbuf: |
| 217 | ltt_chanbuf_alloc_free(&buf->a); |
| 218 | return ret; |
| 219 | } |
| 220 | |
| 221 | void ltt_chan_remove_files(struct ltt_chan *chan) |
| 222 | { |
| 223 | ltt_ascii_remove(chan); |
| 224 | ltt_chan_alloc_remove_files(&chan->a); |
| 225 | } |
| 226 | EXPORT_SYMBOL_GPL(ltt_chan_remove_files); |
| 227 | |
| 228 | |
| 229 | void ltt_chan_free(struct kref *kref) |
| 230 | { |
| 231 | struct ltt_chan *chan = container_of(kref, struct ltt_chan, a.kref); |
| 232 | |
| 233 | ltt_chan_alloc_free(&chan->a); |
| 234 | } |
| 235 | EXPORT_SYMBOL_GPL(ltt_chan_free); |
| 236 | |
| 237 | /** |
| 238 | * ltt_chan_create - Create channel. |
| 239 | */ |
| 240 | int ltt_chan_create(const char *base_filename, |
| 241 | struct ltt_chan *chan, struct dentry *parent, |
| 242 | size_t sb_size, size_t n_sb, |
| 243 | int overwrite, struct ltt_trace *trace) |
| 244 | { |
| 245 | int ret; |
| 246 | |
| 247 | chan->overwrite = overwrite; |
| 248 | |
| 249 | ret = ltt_chan_alloc_init(&chan->a, trace, base_filename, parent, |
| 250 | sb_size, n_sb, overwrite, overwrite); |
| 251 | if (ret) |
| 252 | goto error; |
| 253 | |
| 254 | chan->commit_count_mask = (~0UL >> chan->a.n_sb_order); |
| 255 | |
| 256 | ret = ltt_ascii_create(chan); |
| 257 | if (ret) |
| 258 | goto error_chan_alloc_free; |
| 259 | |
| 260 | return ret; |
| 261 | |
| 262 | error_chan_alloc_free: |
| 263 | ltt_chan_alloc_free(&chan->a); |
| 264 | error: |
| 265 | return ret; |
| 266 | } |
| 267 | EXPORT_SYMBOL_GPL(ltt_chan_create); |
| 268 | |
| 269 | int ltt_chanbuf_open_read(struct ltt_chanbuf *buf) |
| 270 | { |
| 271 | kref_get(&buf->a.chan->kref); |
| 272 | if (!atomic_long_add_unless(&buf->active_readers, 1, 1)) { |
| 273 | kref_put(&buf->a.chan->kref, ltt_chan_free); |
| 274 | return -EBUSY; |
| 275 | } |
| 276 | |
| 277 | return 0; |
| 278 | } |
| 279 | EXPORT_SYMBOL_GPL(ltt_chanbuf_open_read); |
| 280 | |
| 281 | void ltt_chanbuf_release_read(struct ltt_chanbuf *buf) |
| 282 | { |
| 283 | //ltt_relay_destroy_buffer(&buf->a.chan->a, buf->a.cpu); |
| 284 | WARN_ON(atomic_long_read(&buf->active_readers) != 1); |
| 285 | atomic_long_dec(&buf->active_readers); |
| 286 | kref_put(&buf->a.chan->kref, ltt_chan_free); |
| 287 | } |
| 288 | EXPORT_SYMBOL_GPL(ltt_chanbuf_release_read); |
| 289 | |
| 290 | /* |
| 291 | * Wake writers : |
| 292 | * |
| 293 | * This must be done after the trace is removed from the RCU list so that there |
| 294 | * are no stalled writers. |
| 295 | */ |
| 296 | static void ltt_relay_wake_writers(struct ltt_chanbuf *buf) |
| 297 | { |
| 298 | |
| 299 | if (waitqueue_active(&buf->write_wait)) |
| 300 | wake_up_interruptible(&buf->write_wait); |
| 301 | } |
| 302 | |
| 303 | /* |
| 304 | * This function should not be called from NMI interrupt context |
| 305 | */ |
| 306 | static void ltt_buf_unfull(struct ltt_chanbuf *buf) |
| 307 | { |
| 308 | ltt_relay_wake_writers(buf); |
| 309 | } |
| 310 | |
| 311 | /* |
| 312 | * Promote compiler barrier to a smp_mb(). |
| 313 | * For the specific LTTng case, this IPI call should be removed if the |
| 314 | * architecture does not reorder writes. This should eventually be provided by |
| 315 | * a separate architecture-specific infrastructure. |
| 316 | */ |
| 317 | static void remote_mb(void *info) |
| 318 | { |
| 319 | smp_mb(); |
| 320 | } |
| 321 | |
| 322 | int ltt_chanbuf_get_subbuf(struct ltt_chanbuf *buf, unsigned long *consumed) |
| 323 | { |
| 324 | struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); |
| 325 | long consumed_old, consumed_idx, commit_count, write_offset; |
| 326 | int ret; |
| 327 | |
| 328 | consumed_old = atomic_long_read(&buf->consumed); |
| 329 | consumed_idx = SUBBUF_INDEX(consumed_old, chan); |
| 330 | commit_count = local_read(&buf->commit_count[consumed_idx].cc_sb); |
| 331 | /* |
| 332 | * Make sure we read the commit count before reading the buffer |
| 333 | * data and the write offset. Correct consumed offset ordering |
| 334 | * wrt commit count is insured by the use of cmpxchg to update |
| 335 | * the consumed offset. |
| 336 | * smp_call_function_single can fail if the remote CPU is offline, |
| 337 | * this is OK because then there is no wmb to execute there. |
| 338 | * If our thread is executing on the same CPU as the on the buffers |
| 339 | * belongs to, we don't have to synchronize it at all. If we are |
| 340 | * migrated, the scheduler will take care of the memory barriers. |
| 341 | * Normally, smp_call_function_single() should ensure program order when |
| 342 | * executing the remote function, which implies that it surrounds the |
| 343 | * function execution with : |
| 344 | * smp_mb() |
| 345 | * send IPI |
| 346 | * csd_lock_wait |
| 347 | * recv IPI |
| 348 | * smp_mb() |
| 349 | * exec. function |
| 350 | * smp_mb() |
| 351 | * csd unlock |
| 352 | * smp_mb() |
| 353 | * |
| 354 | * However, smp_call_function_single() does not seem to clearly execute |
| 355 | * such barriers. It depends on spinlock semantic to provide the barrier |
| 356 | * before executing the IPI and, when busy-looping, csd_lock_wait only |
| 357 | * executes smp_mb() when it has to wait for the other CPU. |
| 358 | * |
| 359 | * I don't trust this code. Therefore, let's add the smp_mb() sequence |
| 360 | * required ourself, even if duplicated. It has no performance impact |
| 361 | * anyway. |
| 362 | * |
| 363 | * smp_mb() is needed because smp_rmb() and smp_wmb() only order read vs |
| 364 | * read and write vs write. They do not ensure core synchronization. We |
| 365 | * really have to ensure total order between the 3 barriers running on |
| 366 | * the 2 CPUs. |
| 367 | */ |
| 368 | #ifdef LTT_NO_IPI_BARRIER |
| 369 | /* |
| 370 | * Local rmb to match the remote wmb to read the commit count before the |
| 371 | * buffer data and the write offset. |
| 372 | */ |
| 373 | smp_rmb(); |
| 374 | #else |
| 375 | if (raw_smp_processor_id() != buf->a.cpu) { |
| 376 | smp_mb(); /* Total order with IPI handler smp_mb() */ |
| 377 | smp_call_function_single(buf->a.cpu, remote_mb, NULL, 1); |
| 378 | smp_mb(); /* Total order with IPI handler smp_mb() */ |
| 379 | } |
| 380 | #endif |
| 381 | write_offset = local_read(&buf->offset); |
| 382 | /* |
| 383 | * Check that the subbuffer we are trying to consume has been |
| 384 | * already fully committed. |
| 385 | */ |
| 386 | if (((commit_count - chan->a.sb_size) |
| 387 | & chan->commit_count_mask) |
| 388 | - (BUFFER_TRUNC(consumed_old, chan) |
| 389 | >> chan->a.n_sb_order) |
| 390 | != 0) { |
| 391 | return -EAGAIN; |
| 392 | } |
| 393 | /* |
| 394 | * Check that we are not about to read the same subbuffer in |
| 395 | * which the writer head is. |
| 396 | */ |
| 397 | if ((SUBBUF_TRUNC(write_offset, chan) |
| 398 | - SUBBUF_TRUNC(consumed_old, chan)) |
| 399 | == 0) { |
| 400 | return -EAGAIN; |
| 401 | } |
| 402 | |
| 403 | ret = update_read_sb_index(&buf->a, &chan->a, consumed_idx); |
| 404 | if (ret) |
| 405 | return ret; |
| 406 | |
| 407 | *consumed = consumed_old; |
| 408 | return 0; |
| 409 | } |
| 410 | EXPORT_SYMBOL_GPL(ltt_chanbuf_get_subbuf); |
| 411 | |
| 412 | int ltt_chanbuf_put_subbuf(struct ltt_chanbuf *buf, unsigned long consumed) |
| 413 | { |
| 414 | struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); |
| 415 | long consumed_new, consumed_old; |
| 416 | |
| 417 | WARN_ON(atomic_long_read(&buf->active_readers) != 1); |
| 418 | |
| 419 | consumed_old = consumed; |
| 420 | consumed_new = SUBBUF_ALIGN(consumed_old, chan); |
| 421 | WARN_ON_ONCE(RCHAN_SB_IS_NOREF(buf->a.buf_rsb.pages)); |
| 422 | RCHAN_SB_SET_NOREF(buf->a.buf_rsb.pages); |
| 423 | |
| 424 | spin_lock(&buf->full_lock); |
| 425 | if (atomic_long_cmpxchg(&buf->consumed, consumed_old, consumed_new) |
| 426 | != consumed_old) { |
| 427 | /* We have been pushed by the writer. */ |
| 428 | spin_unlock(&buf->full_lock); |
| 429 | /* |
| 430 | * We exchanged the subbuffer pages. No corruption possible |
| 431 | * even if the writer did push us. No more -EIO possible. |
| 432 | */ |
| 433 | return 0; |
| 434 | } else { |
| 435 | /* tell the client that buffer is now unfull */ |
| 436 | int index; |
| 437 | long data; |
| 438 | index = SUBBUF_INDEX(consumed_old, chan); |
| 439 | data = BUFFER_OFFSET(consumed_old, chan); |
| 440 | ltt_buf_unfull(buf); |
| 441 | spin_unlock(&buf->full_lock); |
| 442 | } |
| 443 | return 0; |
| 444 | } |
| 445 | EXPORT_SYMBOL_GPL(ltt_chanbuf_put_subbuf); |
| 446 | |
| 447 | static void switch_buffer(unsigned long data) |
| 448 | { |
| 449 | struct ltt_chanbuf *buf = (struct ltt_chanbuf *)data; |
| 450 | struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); |
| 451 | |
| 452 | /* |
| 453 | * Only flush buffers periodically if readers are active. |
| 454 | */ |
| 455 | if (atomic_long_read(&buf->active_readers)) |
| 456 | ltt_force_switch(buf, FORCE_ACTIVE); |
| 457 | |
| 458 | mod_timer_pinned(&buf->switch_timer, |
| 459 | jiffies + chan->switch_timer_interval); |
| 460 | } |
| 461 | |
| 462 | static void ltt_chanbuf_start_switch_timer(struct ltt_chanbuf *buf) |
| 463 | { |
| 464 | struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); |
| 465 | |
| 466 | if (!chan->switch_timer_interval) |
| 467 | return; |
| 468 | |
| 469 | init_timer_deferrable(&buf->switch_timer); |
| 470 | buf->switch_timer.function = switch_buffer; |
| 471 | buf->switch_timer.expires = jiffies + chan->switch_timer_interval; |
| 472 | buf->switch_timer.data = (unsigned long)buf; |
| 473 | add_timer_on(&buf->switch_timer, buf->a.cpu); |
| 474 | } |
| 475 | |
| 476 | /* |
| 477 | * called with ltt traces lock held. |
| 478 | */ |
| 479 | void ltt_chan_start_switch_timer(struct ltt_chan *chan) |
| 480 | { |
| 481 | int cpu; |
| 482 | |
| 483 | if (!chan->switch_timer_interval) |
| 484 | return; |
| 485 | |
| 486 | for_each_online_cpu(cpu) { |
| 487 | struct ltt_chanbuf *buf; |
| 488 | |
| 489 | buf = per_cpu_ptr(chan->a.buf, cpu); |
| 490 | ltt_chanbuf_start_switch_timer(buf); |
| 491 | } |
| 492 | } |
| 493 | |
| 494 | static void ltt_chanbuf_stop_switch_timer(struct ltt_chanbuf *buf) |
| 495 | { |
| 496 | struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); |
| 497 | |
| 498 | if (!chan->switch_timer_interval) |
| 499 | return; |
| 500 | |
| 501 | del_timer_sync(&buf->switch_timer); |
| 502 | } |
| 503 | |
| 504 | /* |
| 505 | * called with ltt traces lock held. |
| 506 | */ |
| 507 | void ltt_chan_stop_switch_timer(struct ltt_chan *chan) |
| 508 | { |
| 509 | int cpu; |
| 510 | |
| 511 | if (!chan->switch_timer_interval) |
| 512 | return; |
| 513 | |
| 514 | for_each_online_cpu(cpu) { |
| 515 | struct ltt_chanbuf *buf; |
| 516 | |
| 517 | buf = per_cpu_ptr(chan->a.buf, cpu); |
| 518 | ltt_chanbuf_stop_switch_timer(buf); |
| 519 | } |
| 520 | } |
| 521 | |
| 522 | static void ltt_chanbuf_idle_switch(struct ltt_chanbuf *buf) |
| 523 | { |
| 524 | struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); |
| 525 | |
| 526 | if (chan->switch_timer_interval) |
| 527 | ltt_force_switch(buf, FORCE_ACTIVE); |
| 528 | } |
| 529 | |
| 530 | /* |
| 531 | * ltt_chanbuf_switch is called from a remote CPU to ensure that the buffers of |
| 532 | * a cpu which went down are flushed. Note that if we execute concurrently |
| 533 | * with trace allocation, a buffer might appear be unallocated (because it |
| 534 | * detects that the target CPU is offline). |
| 535 | */ |
| 536 | static void ltt_chanbuf_switch(struct ltt_chanbuf *buf) |
| 537 | { |
| 538 | if (buf->a.allocated) |
| 539 | ltt_force_switch(buf, FORCE_ACTIVE); |
| 540 | } |
| 541 | |
| 542 | /** |
| 543 | * ltt_chanbuf_hotcpu_callback - CPU hotplug callback |
| 544 | * @nb: notifier block |
| 545 | * @action: hotplug action to take |
| 546 | * @hcpu: CPU number |
| 547 | * |
| 548 | * Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD) |
| 549 | */ |
| 550 | static |
| 551 | int ltt_chanbuf_hotcpu_callback(struct notifier_block *nb, |
| 552 | unsigned long action, |
| 553 | void *hcpu) |
| 554 | { |
| 555 | unsigned int cpu = (unsigned long)hcpu; |
| 556 | |
| 557 | switch (action) { |
| 558 | case CPU_DOWN_FAILED: |
| 559 | case CPU_DOWN_FAILED_FROZEN: |
| 560 | case CPU_ONLINE: |
| 561 | case CPU_ONLINE_FROZEN: |
| 562 | /* |
| 563 | * CPU hotplug lock protects trace lock from this callback. |
| 564 | */ |
| 565 | ltt_chan_for_each_channel(ltt_chanbuf_start_switch_timer, cpu); |
| 566 | return NOTIFY_OK; |
| 567 | |
| 568 | case CPU_DOWN_PREPARE: |
| 569 | case CPU_DOWN_PREPARE_FROZEN: |
| 570 | /* |
| 571 | * Performs an IPI to delete the timer locally on the target |
| 572 | * CPU. CPU hotplug lock protects trace lock from this |
| 573 | * callback. |
| 574 | */ |
| 575 | ltt_chan_for_each_channel(ltt_chanbuf_stop_switch_timer, cpu); |
| 576 | return NOTIFY_OK; |
| 577 | |
| 578 | case CPU_DEAD: |
| 579 | case CPU_DEAD_FROZEN: |
| 580 | /* |
| 581 | * Performing a buffer switch on a remote CPU. Performed by |
| 582 | * the CPU responsible for doing the hotunplug after the target |
| 583 | * CPU stopped running completely. Ensures that all data |
| 584 | * from that remote CPU is flushed. CPU hotplug lock protects |
| 585 | * trace lock from this callback. |
| 586 | */ |
| 587 | ltt_chan_for_each_channel(ltt_chanbuf_switch, cpu); |
| 588 | return NOTIFY_OK; |
| 589 | |
| 590 | default: |
| 591 | return NOTIFY_DONE; |
| 592 | } |
| 593 | } |
| 594 | |
| 595 | static int pm_idle_entry_callback(struct notifier_block *self, |
| 596 | unsigned long val, void *data) |
| 597 | { |
| 598 | if (val == IDLE_START) { |
| 599 | rcu_read_lock_sched_notrace(); |
| 600 | ltt_chan_for_each_channel(ltt_chanbuf_idle_switch, |
| 601 | smp_processor_id()); |
| 602 | rcu_read_unlock_sched_notrace(); |
| 603 | } |
| 604 | return 0; |
| 605 | } |
| 606 | |
| 607 | struct notifier_block pm_idle_entry_notifier = { |
| 608 | .notifier_call = pm_idle_entry_callback, |
| 609 | .priority = ~0U, /* smallest prio, run after tracing events */ |
| 610 | }; |
| 611 | |
| 612 | static |
| 613 | void ltt_relay_print_written(struct ltt_chan *chan, long cons_off, |
| 614 | unsigned int cpu) |
| 615 | { |
| 616 | struct ltt_chanbuf *buf = per_cpu_ptr(chan->a.buf, cpu); |
| 617 | long cons_idx, events_count; |
| 618 | |
| 619 | cons_idx = SUBBUF_INDEX(cons_off, chan); |
| 620 | events_count = local_read(&buf->commit_count[cons_idx].events); |
| 621 | |
| 622 | if (events_count) |
| 623 | printk(KERN_INFO |
| 624 | "LTT: %lu events written in channel %s " |
| 625 | "(cpu %u, index %lu)\n", |
| 626 | events_count, chan->a.filename, cpu, cons_idx); |
| 627 | } |
| 628 | |
| 629 | static |
| 630 | void ltt_relay_print_subbuffer_errors(struct ltt_chanbuf *buf, |
| 631 | struct ltt_chan *chan, long cons_off, |
| 632 | unsigned int cpu) |
| 633 | { |
| 634 | long cons_idx, commit_count, commit_count_sb, write_offset; |
| 635 | |
| 636 | cons_idx = SUBBUF_INDEX(cons_off, chan); |
| 637 | commit_count = local_read(&buf->commit_count[cons_idx].cc); |
| 638 | commit_count_sb = local_read(&buf->commit_count[cons_idx].cc_sb); |
| 639 | /* |
| 640 | * No need to order commit_count and write_offset reads because we |
| 641 | * execute after trace is stopped when there are no readers left. |
| 642 | */ |
| 643 | write_offset = local_read(&buf->offset); |
| 644 | printk(KERN_WARNING |
| 645 | "LTT : unread channel %s offset is %ld " |
| 646 | "and cons_off : %ld (cpu %u)\n", |
| 647 | chan->a.filename, write_offset, cons_off, cpu); |
| 648 | /* Check each sub-buffer for non filled commit count */ |
| 649 | if (((commit_count - chan->a.sb_size) & chan->commit_count_mask) |
| 650 | - (BUFFER_TRUNC(cons_off, chan) >> chan->a.n_sb_order) |
| 651 | != 0) |
| 652 | printk(KERN_ALERT |
| 653 | "LTT : %s : subbuffer %lu has non filled " |
| 654 | "commit count [cc, cc_sb] [%lu,%lu].\n", |
| 655 | chan->a.filename, cons_idx, commit_count, |
| 656 | commit_count_sb); |
| 657 | printk(KERN_ALERT "LTT : %s : commit count : %lu, subbuf size %lu\n", |
| 658 | chan->a.filename, commit_count, chan->a.sb_size); |
| 659 | } |
| 660 | |
| 661 | static |
| 662 | void ltt_relay_print_errors(struct ltt_chanbuf *buf, struct ltt_chan *chan, |
| 663 | struct ltt_trace *trace, int cpu) |
| 664 | { |
| 665 | long cons_off; |
| 666 | |
| 667 | /* |
| 668 | * Can be called in the error path of allocation when |
| 669 | * trans_channel_data is not yet set. |
| 670 | */ |
| 671 | if (!chan) |
| 672 | return; |
| 673 | for (cons_off = 0; cons_off < chan->a.buf_size; |
| 674 | cons_off = SUBBUF_ALIGN(cons_off, chan)) |
| 675 | ltt_relay_print_written(chan, cons_off, cpu); |
| 676 | for (cons_off = atomic_long_read(&buf->consumed); |
| 677 | (SUBBUF_TRUNC(local_read(&buf->offset), chan) |
| 678 | - cons_off) > 0; |
| 679 | cons_off = SUBBUF_ALIGN(cons_off, chan)) |
| 680 | ltt_relay_print_subbuffer_errors(buf, chan, cons_off, cpu); |
| 681 | } |
| 682 | |
| 683 | static |
| 684 | void ltt_relay_print_buffer_errors(struct ltt_chan *chan, unsigned int cpu) |
| 685 | { |
| 686 | struct ltt_trace *trace = chan->a.trace; |
| 687 | struct ltt_chanbuf *buf = per_cpu_ptr(chan->a.buf, cpu); |
| 688 | |
| 689 | if (local_read(&buf->events_lost)) |
| 690 | printk(KERN_ALERT |
| 691 | "LTT : %s : %ld events lost " |
| 692 | "in %s channel (cpu %u).\n", |
| 693 | chan->a.filename, local_read(&buf->events_lost), |
| 694 | chan->a.filename, cpu); |
| 695 | if (local_read(&buf->corrupted_subbuffers)) |
| 696 | printk(KERN_ALERT |
| 697 | "LTT : %s : %ld corrupted subbuffers " |
| 698 | "in %s channel (cpu %u).\n", |
| 699 | chan->a.filename, |
| 700 | local_read(&buf->corrupted_subbuffers), |
| 701 | chan->a.filename, cpu); |
| 702 | |
| 703 | ltt_relay_print_errors(buf, chan, trace, cpu); |
| 704 | } |
| 705 | |
| 706 | static void ltt_relay_remove_dirs(struct ltt_trace *trace) |
| 707 | { |
| 708 | ltt_ascii_remove_dir(trace); |
| 709 | debugfs_remove(trace->dentry.trace_root); |
| 710 | } |
| 711 | |
| 712 | static int ltt_relay_create_dirs(struct ltt_trace *new_trace) |
| 713 | { |
| 714 | struct dentry *ltt_root_dentry; |
| 715 | int ret; |
| 716 | |
| 717 | ltt_root_dentry = get_ltt_root(); |
| 718 | if (!ltt_root_dentry) |
| 719 | return ENOENT; |
| 720 | |
| 721 | new_trace->dentry.trace_root = debugfs_create_dir(new_trace->trace_name, |
| 722 | ltt_root_dentry); |
| 723 | put_ltt_root(); |
| 724 | if (new_trace->dentry.trace_root == NULL) { |
| 725 | printk(KERN_ERR "LTT : Trace directory name %s already taken\n", |
| 726 | new_trace->trace_name); |
| 727 | return EEXIST; |
| 728 | } |
| 729 | ret = ltt_ascii_create_dir(new_trace); |
| 730 | if (ret) |
| 731 | printk(KERN_WARNING "LTT : Unable to create ascii output file " |
| 732 | "for trace %s\n", new_trace->trace_name); |
| 733 | |
| 734 | return 0; |
| 735 | } |
| 736 | |
| 737 | /* |
| 738 | * LTTng channel flush function. |
| 739 | * |
| 740 | * Must be called when no tracing is active in the channel, because of |
| 741 | * accesses across CPUs. |
| 742 | */ |
| 743 | static notrace void ltt_relay_buffer_flush(struct ltt_chanbuf *buf) |
| 744 | { |
| 745 | buf->finalized = 1; |
| 746 | ltt_force_switch(buf, FORCE_FLUSH); |
| 747 | } |
| 748 | |
| 749 | static void ltt_relay_async_wakeup_chan(struct ltt_chan *chan) |
| 750 | { |
| 751 | unsigned int i; |
| 752 | |
| 753 | for_each_possible_cpu(i) { |
| 754 | struct ltt_chanbuf *buf; |
| 755 | |
| 756 | buf = per_cpu_ptr(chan->a.buf, i); |
| 757 | if (!buf->a.allocated) |
| 758 | continue; |
| 759 | /* |
| 760 | * Ensure the buffer has been allocated before reading its |
| 761 | * content. Sync cpu hotplug vs async wakeup. |
| 762 | */ |
| 763 | smp_rmb(); |
| 764 | if (ltt_poll_deliver(buf, chan)) |
| 765 | wake_up_interruptible(&buf->read_wait); |
| 766 | } |
| 767 | } |
| 768 | |
| 769 | static void ltt_relay_finish_buffer(struct ltt_chan *chan, unsigned int cpu) |
| 770 | { |
| 771 | struct ltt_chanbuf *buf = per_cpu_ptr(chan->a.buf, cpu); |
| 772 | |
| 773 | if (buf->a.allocated) { |
| 774 | ltt_relay_buffer_flush(buf); |
| 775 | ltt_relay_wake_writers(buf); |
| 776 | } |
| 777 | } |
| 778 | |
| 779 | |
| 780 | static void ltt_relay_finish_channel(struct ltt_chan *chan) |
| 781 | { |
| 782 | unsigned int i; |
| 783 | |
| 784 | for_each_possible_cpu(i) |
| 785 | ltt_relay_finish_buffer(chan, i); |
| 786 | } |
| 787 | |
| 788 | /* |
| 789 | * This is called with preemption disabled when user space has requested |
| 790 | * blocking mode. If one of the active traces has free space below a |
| 791 | * specific threshold value, we reenable preemption and block. |
| 792 | */ |
| 793 | static |
| 794 | int ltt_relay_user_blocking(struct ltt_trace *trace, unsigned int chan_index, |
| 795 | size_t data_size, struct user_dbg_data *dbg) |
| 796 | { |
| 797 | struct ltt_chanbuf *buf; |
| 798 | struct ltt_chan *chan; |
| 799 | int cpu; |
| 800 | DECLARE_WAITQUEUE(wait, current); |
| 801 | |
| 802 | chan = &trace->channels[chan_index]; |
| 803 | cpu = smp_processor_id(); |
| 804 | buf = per_cpu_ptr(chan->a.buf, cpu); |
| 805 | |
| 806 | /* |
| 807 | * Check if data is too big for the channel : do not |
| 808 | * block for it. |
| 809 | */ |
| 810 | if (LTT_RESERVE_CRITICAL + data_size > chan->a.sb_size) |
| 811 | return 0; |
| 812 | |
| 813 | /* |
| 814 | * If free space too low, we block. We restart from the |
| 815 | * beginning after we resume (cpu id may have changed |
| 816 | * while preemption is active). |
| 817 | */ |
| 818 | spin_lock(&buf->full_lock); |
| 819 | if (!chan->overwrite) { |
| 820 | dbg->write = local_read(&buf->offset); |
| 821 | dbg->read = atomic_long_read(&buf->consumed); |
| 822 | dbg->avail_size = dbg->write + LTT_RESERVE_CRITICAL + data_size |
| 823 | - SUBBUF_TRUNC(dbg->read, chan); |
| 824 | if (dbg->avail_size > chan->a.buf_size) { |
| 825 | __set_current_state(TASK_INTERRUPTIBLE); |
| 826 | add_wait_queue(&buf->write_wait, &wait); |
| 827 | spin_unlock(&buf->full_lock); |
| 828 | preempt_enable(); |
| 829 | schedule(); |
| 830 | __set_current_state(TASK_RUNNING); |
| 831 | remove_wait_queue(&buf->write_wait, &wait); |
| 832 | if (signal_pending(current)) |
| 833 | return -ERESTARTSYS; |
| 834 | preempt_disable(); |
| 835 | return 1; |
| 836 | } |
| 837 | } |
| 838 | spin_unlock(&buf->full_lock); |
| 839 | return 0; |
| 840 | } |
| 841 | |
| 842 | static |
| 843 | void ltt_relay_print_user_errors(struct ltt_trace *trace, |
| 844 | unsigned int chan_index, size_t data_size, |
| 845 | struct user_dbg_data *dbg, int cpu) |
| 846 | { |
| 847 | struct ltt_chanbuf *buf; |
| 848 | struct ltt_chan *chan; |
| 849 | |
| 850 | chan = &trace->channels[chan_index]; |
| 851 | buf = per_cpu_ptr(chan->a.buf, cpu); |
| 852 | |
| 853 | printk(KERN_ERR "Error in LTT usertrace : " |
| 854 | "buffer full : event lost in blocking " |
| 855 | "mode. Increase LTT_RESERVE_CRITICAL.\n"); |
| 856 | printk(KERN_ERR "LTT nesting level is %u.\n", |
| 857 | per_cpu(ltt_nesting, cpu)); |
| 858 | printk(KERN_ERR "LTT available size %lu.\n", |
| 859 | dbg->avail_size); |
| 860 | printk(KERN_ERR "available write : %lu, read : %lu\n", |
| 861 | dbg->write, dbg->read); |
| 862 | |
| 863 | dbg->write = local_read(&buf->offset); |
| 864 | dbg->read = atomic_long_read(&buf->consumed); |
| 865 | |
| 866 | printk(KERN_ERR "LTT current size %lu.\n", |
| 867 | dbg->write + LTT_RESERVE_CRITICAL + data_size |
| 868 | - SUBBUF_TRUNC(dbg->read, chan)); |
| 869 | printk(KERN_ERR "current write : %lu, read : %lu\n", |
| 870 | dbg->write, dbg->read); |
| 871 | } |
| 872 | |
| 873 | /* |
| 874 | * ltt_reserve_switch_old_subbuf: switch old subbuffer |
| 875 | * |
| 876 | * Concurrency safe because we are the last and only thread to alter this |
| 877 | * sub-buffer. As long as it is not delivered and read, no other thread can |
| 878 | * alter the offset, alter the reserve_count or call the |
| 879 | * client_buffer_end_callback on this sub-buffer. |
| 880 | * |
| 881 | * The only remaining threads could be the ones with pending commits. They will |
| 882 | * have to do the deliver themselves. Not concurrency safe in overwrite mode. |
| 883 | * We detect corrupted subbuffers with commit and reserve counts. We keep a |
| 884 | * corrupted sub-buffers count and push the readers across these sub-buffers. |
| 885 | * |
| 886 | * Not concurrency safe if a writer is stalled in a subbuffer and another writer |
| 887 | * switches in, finding out it's corrupted. The result will be than the old |
| 888 | * (uncommited) subbuffer will be declared corrupted, and that the new subbuffer |
| 889 | * will be declared corrupted too because of the commit count adjustment. |
| 890 | * |
| 891 | * Note : offset_old should never be 0 here. |
| 892 | */ |
| 893 | static |
| 894 | void ltt_reserve_switch_old_subbuf(struct ltt_chanbuf *buf, |
| 895 | struct ltt_chan *chan, |
| 896 | struct ltt_reserve_switch_offsets *offsets, |
| 897 | u64 *tsc) |
| 898 | { |
| 899 | long oldidx = SUBBUF_INDEX(offsets->old - 1, chan); |
| 900 | long commit_count, padding_size; |
| 901 | |
| 902 | padding_size = chan->a.sb_size |
| 903 | - (SUBBUF_OFFSET(offsets->old - 1, chan) + 1); |
| 904 | ltt_buffer_end(buf, *tsc, offsets->old, oldidx); |
| 905 | |
| 906 | /* |
| 907 | * Must write slot data before incrementing commit count. |
| 908 | * This compiler barrier is upgraded into a smp_wmb() by the IPI |
| 909 | * sent by get_subbuf() when it does its smp_rmb(). |
| 910 | */ |
| 911 | barrier(); |
| 912 | local_add(padding_size, &buf->commit_count[oldidx].cc); |
| 913 | commit_count = local_read(&buf->commit_count[oldidx].cc); |
| 914 | ltt_check_deliver(buf, chan, offsets->old - 1, commit_count, oldidx); |
| 915 | ltt_write_commit_counter(buf, chan, oldidx, offsets->old, commit_count, |
| 916 | padding_size); |
| 917 | } |
| 918 | |
| 919 | /* |
| 920 | * ltt_reserve_switch_new_subbuf: Populate new subbuffer. |
| 921 | * |
| 922 | * This code can be executed unordered : writers may already have written to the |
| 923 | * sub-buffer before this code gets executed, caution. The commit makes sure |
| 924 | * that this code is executed before the deliver of this sub-buffer. |
| 925 | */ |
| 926 | static |
| 927 | void ltt_reserve_switch_new_subbuf(struct ltt_chanbuf *buf, |
| 928 | struct ltt_chan *chan, |
| 929 | struct ltt_reserve_switch_offsets *offsets, |
| 930 | u64 *tsc) |
| 931 | { |
| 932 | long beginidx = SUBBUF_INDEX(offsets->begin, chan); |
| 933 | long commit_count; |
| 934 | |
| 935 | ltt_buffer_begin(buf, *tsc, beginidx); |
| 936 | |
| 937 | /* |
| 938 | * Must write slot data before incrementing commit count. |
| 939 | * This compiler barrier is upgraded into a smp_wmb() by the IPI |
| 940 | * sent by get_subbuf() when it does its smp_rmb(). |
| 941 | */ |
| 942 | barrier(); |
| 943 | local_add(ltt_sb_header_size(), &buf->commit_count[beginidx].cc); |
| 944 | commit_count = local_read(&buf->commit_count[beginidx].cc); |
| 945 | /* Check if the written buffer has to be delivered */ |
| 946 | ltt_check_deliver(buf, chan, offsets->begin, commit_count, beginidx); |
| 947 | ltt_write_commit_counter(buf, chan, beginidx, offsets->begin, |
| 948 | commit_count, ltt_sb_header_size()); |
| 949 | } |
| 950 | |
| 951 | |
| 952 | /* |
| 953 | * ltt_reserve_end_switch_current: finish switching current subbuffer |
| 954 | * |
| 955 | * Concurrency safe because we are the last and only thread to alter this |
| 956 | * sub-buffer. As long as it is not delivered and read, no other thread can |
| 957 | * alter the offset, alter the reserve_count or call the |
| 958 | * client_buffer_end_callback on this sub-buffer. |
| 959 | * |
| 960 | * The only remaining threads could be the ones with pending commits. They will |
| 961 | * have to do the deliver themselves. Not concurrency safe in overwrite mode. |
| 962 | * We detect corrupted subbuffers with commit and reserve counts. We keep a |
| 963 | * corrupted sub-buffers count and push the readers across these sub-buffers. |
| 964 | * |
| 965 | * Not concurrency safe if a writer is stalled in a subbuffer and another writer |
| 966 | * switches in, finding out it's corrupted. The result will be than the old |
| 967 | * (uncommited) subbuffer will be declared corrupted, and that the new subbuffer |
| 968 | * will be declared corrupted too because of the commit count adjustment. |
| 969 | */ |
| 970 | static |
| 971 | void ltt_reserve_end_switch_current(struct ltt_chanbuf *buf, |
| 972 | struct ltt_chan *chan, |
| 973 | struct ltt_reserve_switch_offsets *offsets, |
| 974 | u64 *tsc) |
| 975 | { |
| 976 | long endidx = SUBBUF_INDEX(offsets->end - 1, chan); |
| 977 | long commit_count, padding_size; |
| 978 | |
| 979 | padding_size = chan->a.sb_size |
| 980 | - (SUBBUF_OFFSET(offsets->end - 1, chan) + 1); |
| 981 | |
| 982 | ltt_buffer_end(buf, *tsc, offsets->end, endidx); |
| 983 | |
| 984 | /* |
| 985 | * Must write slot data before incrementing commit count. |
| 986 | * This compiler barrier is upgraded into a smp_wmb() by the IPI |
| 987 | * sent by get_subbuf() when it does its smp_rmb(). |
| 988 | */ |
| 989 | barrier(); |
| 990 | local_add(padding_size, &buf->commit_count[endidx].cc); |
| 991 | commit_count = local_read(&buf->commit_count[endidx].cc); |
| 992 | ltt_check_deliver(buf, chan, offsets->end - 1, commit_count, endidx); |
| 993 | ltt_write_commit_counter(buf, chan, endidx, offsets->end, commit_count, |
| 994 | padding_size); |
| 995 | } |
| 996 | |
| 997 | /* |
| 998 | * Returns : |
| 999 | * 0 if ok |
| 1000 | * !0 if execution must be aborted. |
| 1001 | */ |
| 1002 | static |
| 1003 | int ltt_relay_try_switch_slow(enum force_switch_mode mode, |
| 1004 | struct ltt_chanbuf *buf, struct ltt_chan *chan, |
| 1005 | struct ltt_reserve_switch_offsets *offsets, |
| 1006 | u64 *tsc) |
| 1007 | { |
| 1008 | long sb_index; |
| 1009 | long reserve_commit_diff; |
| 1010 | long off; |
| 1011 | |
| 1012 | offsets->begin = local_read(&buf->offset); |
| 1013 | offsets->old = offsets->begin; |
| 1014 | offsets->begin_switch = 0; |
| 1015 | offsets->end_switch_old = 0; |
| 1016 | |
| 1017 | *tsc = trace_clock_read64(); |
| 1018 | |
| 1019 | off = SUBBUF_OFFSET(offsets->begin, chan); |
| 1020 | if ((mode != FORCE_ACTIVE && off > 0) || off > ltt_sb_header_size()) { |
| 1021 | offsets->begin = SUBBUF_ALIGN(offsets->begin, chan); |
| 1022 | offsets->end_switch_old = 1; |
| 1023 | } else { |
| 1024 | /* we do not have to switch : buffer is empty */ |
| 1025 | return -1; |
| 1026 | } |
| 1027 | if (mode == FORCE_ACTIVE) |
| 1028 | offsets->begin += ltt_sb_header_size(); |
| 1029 | /* |
| 1030 | * Always begin_switch in FORCE_ACTIVE mode. |
| 1031 | * Test new buffer integrity |
| 1032 | */ |
| 1033 | sb_index = SUBBUF_INDEX(offsets->begin, chan); |
| 1034 | reserve_commit_diff = |
| 1035 | (BUFFER_TRUNC(offsets->begin, chan) |
| 1036 | >> chan->a.n_sb_order) |
| 1037 | - (local_read(&buf->commit_count[sb_index].cc_sb) |
| 1038 | & chan->commit_count_mask); |
| 1039 | if (reserve_commit_diff == 0) { |
| 1040 | /* Next buffer not corrupted. */ |
| 1041 | if (mode == FORCE_ACTIVE |
| 1042 | && !chan->overwrite |
| 1043 | && offsets->begin - atomic_long_read(&buf->consumed) |
| 1044 | >= chan->a.buf_size) { |
| 1045 | /* |
| 1046 | * We do not overwrite non consumed buffers and we are |
| 1047 | * full : ignore switch while tracing is active. |
| 1048 | */ |
| 1049 | return -1; |
| 1050 | } |
| 1051 | } else { |
| 1052 | /* |
| 1053 | * Next subbuffer corrupted. Force pushing reader even in normal |
| 1054 | * mode |
| 1055 | */ |
| 1056 | } |
| 1057 | offsets->end = offsets->begin; |
| 1058 | return 0; |
| 1059 | } |
| 1060 | |
| 1061 | /* |
| 1062 | * Force a sub-buffer switch for a per-cpu buffer. This operation is |
| 1063 | * completely reentrant : can be called while tracing is active with |
| 1064 | * absolutely no lock held. |
| 1065 | * |
| 1066 | * Note, however, that as a local_cmpxchg is used for some atomic |
| 1067 | * operations, this function must be called from the CPU which owns the buffer |
| 1068 | * for a ACTIVE flush. |
| 1069 | */ |
| 1070 | void ltt_force_switch_lockless_slow(struct ltt_chanbuf *buf, |
| 1071 | enum force_switch_mode mode) |
| 1072 | { |
| 1073 | struct ltt_chan *chan = container_of(buf->a.chan, struct ltt_chan, a); |
| 1074 | struct ltt_reserve_switch_offsets offsets; |
| 1075 | u64 tsc; |
| 1076 | |
| 1077 | offsets.size = 0; |
| 1078 | |
| 1079 | /* |
| 1080 | * Perform retryable operations. |
| 1081 | */ |
| 1082 | do { |
| 1083 | if (ltt_relay_try_switch_slow(mode, buf, chan, &offsets, &tsc)) |
| 1084 | return; |
| 1085 | } while (local_cmpxchg(&buf->offset, offsets.old, offsets.end) |
| 1086 | != offsets.old); |
| 1087 | |
| 1088 | /* |
| 1089 | * Atomically update last_tsc. This update races against concurrent |
| 1090 | * atomic updates, but the race will always cause supplementary full TSC |
| 1091 | * events, never the opposite (missing a full TSC event when it would be |
| 1092 | * needed). |
| 1093 | */ |
| 1094 | save_last_tsc(buf, tsc); |
| 1095 | |
| 1096 | /* |
| 1097 | * Push the reader if necessary |
| 1098 | */ |
| 1099 | if (mode == FORCE_ACTIVE) { |
| 1100 | ltt_reserve_push_reader(buf, chan, offsets.end - 1); |
| 1101 | ltt_clear_noref_flag(&buf->a, SUBBUF_INDEX(offsets.end - 1, |
| 1102 | chan)); |
| 1103 | } |
| 1104 | |
| 1105 | /* |
| 1106 | * Switch old subbuffer if needed. |
| 1107 | */ |
| 1108 | if (offsets.end_switch_old) { |
| 1109 | ltt_clear_noref_flag(&buf->a, SUBBUF_INDEX(offsets.old - 1, |
| 1110 | chan)); |
| 1111 | ltt_reserve_switch_old_subbuf(buf, chan, &offsets, &tsc); |
| 1112 | } |
| 1113 | |
| 1114 | /* |
| 1115 | * Populate new subbuffer. |
| 1116 | */ |
| 1117 | if (mode == FORCE_ACTIVE) |
| 1118 | ltt_reserve_switch_new_subbuf(buf, chan, &offsets, &tsc); |
| 1119 | } |
| 1120 | EXPORT_SYMBOL_GPL(ltt_force_switch_lockless_slow); |
| 1121 | |
| 1122 | /* |
| 1123 | * Returns : |
| 1124 | * 0 if ok |
| 1125 | * !0 if execution must be aborted. |
| 1126 | */ |
| 1127 | static |
| 1128 | int ltt_relay_try_reserve_slow(struct ltt_chanbuf *buf, struct ltt_chan *chan, |
| 1129 | struct ltt_reserve_switch_offsets *offsets, |
| 1130 | size_t data_size, u64 *tsc, unsigned int *rflags, |
| 1131 | int largest_align) |
| 1132 | { |
| 1133 | long reserve_commit_diff; |
| 1134 | |
| 1135 | offsets->begin = local_read(&buf->offset); |
| 1136 | offsets->old = offsets->begin; |
| 1137 | offsets->begin_switch = 0; |
| 1138 | offsets->end_switch_current = 0; |
| 1139 | offsets->end_switch_old = 0; |
| 1140 | |
| 1141 | *tsc = trace_clock_read64(); |
| 1142 | if (last_tsc_overflow(buf, *tsc)) |
| 1143 | *rflags = LTT_RFLAG_ID_SIZE_TSC; |
| 1144 | |
| 1145 | if (unlikely(SUBBUF_OFFSET(offsets->begin, chan) == 0)) { |
| 1146 | offsets->begin_switch = 1; /* For offsets->begin */ |
| 1147 | } else { |
| 1148 | offsets->size = ltt_get_header_size(chan, offsets->begin, |
| 1149 | data_size, |
| 1150 | &offsets->before_hdr_pad, |
| 1151 | *rflags); |
| 1152 | offsets->size += ltt_align(offsets->begin + offsets->size, |
| 1153 | largest_align) |
| 1154 | + data_size; |
| 1155 | if (unlikely((SUBBUF_OFFSET(offsets->begin, chan) + |
| 1156 | offsets->size) > chan->a.sb_size)) { |
| 1157 | offsets->end_switch_old = 1; /* For offsets->old */ |
| 1158 | offsets->begin_switch = 1; /* For offsets->begin */ |
| 1159 | } |
| 1160 | } |
| 1161 | if (unlikely(offsets->begin_switch)) { |
| 1162 | long sb_index; |
| 1163 | |
| 1164 | /* |
| 1165 | * We are typically not filling the previous buffer completely. |
| 1166 | */ |
| 1167 | if (likely(offsets->end_switch_old)) |
| 1168 | offsets->begin = SUBBUF_ALIGN(offsets->begin, chan); |
| 1169 | offsets->begin = offsets->begin + ltt_sb_header_size(); |
| 1170 | /* Test new buffer integrity */ |
| 1171 | sb_index = SUBBUF_INDEX(offsets->begin, chan); |
| 1172 | reserve_commit_diff = |
| 1173 | (BUFFER_TRUNC(offsets->begin, chan) |
| 1174 | >> chan->a.n_sb_order) |
| 1175 | - (local_read(&buf->commit_count[sb_index].cc_sb) |
| 1176 | & chan->commit_count_mask); |
| 1177 | if (likely(reserve_commit_diff == 0)) { |
| 1178 | /* Next buffer not corrupted. */ |
| 1179 | if (unlikely(!chan->overwrite && |
| 1180 | (SUBBUF_TRUNC(offsets->begin, chan) |
| 1181 | - SUBBUF_TRUNC(atomic_long_read(&buf->consumed), |
| 1182 | chan)) |
| 1183 | >= chan->a.buf_size)) { |
| 1184 | /* |
| 1185 | * We do not overwrite non consumed buffers |
| 1186 | * and we are full : event is lost. |
| 1187 | */ |
| 1188 | local_inc(&buf->events_lost); |
| 1189 | return -1; |
| 1190 | } else { |
| 1191 | /* |
| 1192 | * next buffer not corrupted, we are either in |
| 1193 | * overwrite mode or the buffer is not full. |
| 1194 | * It's safe to write in this new subbuffer. |
| 1195 | */ |
| 1196 | } |
| 1197 | } else { |
| 1198 | /* |
| 1199 | * Next subbuffer corrupted. Drop event in normal and |
| 1200 | * overwrite mode. Caused by either a writer OOPS or |
| 1201 | * too many nested writes over a reserve/commit pair. |
| 1202 | */ |
| 1203 | local_inc(&buf->events_lost); |
| 1204 | return -1; |
| 1205 | } |
| 1206 | offsets->size = ltt_get_header_size(chan, offsets->begin, |
| 1207 | data_size, |
| 1208 | &offsets->before_hdr_pad, |
| 1209 | *rflags); |
| 1210 | offsets->size += ltt_align(offsets->begin + offsets->size, |
| 1211 | largest_align) |
| 1212 | + data_size; |
| 1213 | if (unlikely((SUBBUF_OFFSET(offsets->begin, chan) |
| 1214 | + offsets->size) > chan->a.sb_size)) { |
| 1215 | /* |
| 1216 | * Event too big for subbuffers, report error, don't |
| 1217 | * complete the sub-buffer switch. |
| 1218 | */ |
| 1219 | local_inc(&buf->events_lost); |
| 1220 | return -1; |
| 1221 | } else { |
| 1222 | /* |
| 1223 | * We just made a successful buffer switch and the event |
| 1224 | * fits in the new subbuffer. Let's write. |
| 1225 | */ |
| 1226 | } |
| 1227 | } else { |
| 1228 | /* |
| 1229 | * Event fits in the current buffer and we are not on a switch |
| 1230 | * boundary. It's safe to write. |
| 1231 | */ |
| 1232 | } |
| 1233 | offsets->end = offsets->begin + offsets->size; |
| 1234 | |
| 1235 | if (unlikely((SUBBUF_OFFSET(offsets->end, chan)) == 0)) { |
| 1236 | /* |
| 1237 | * The offset_end will fall at the very beginning of the next |
| 1238 | * subbuffer. |
| 1239 | */ |
| 1240 | offsets->end_switch_current = 1; /* For offsets->begin */ |
| 1241 | } |
| 1242 | return 0; |
| 1243 | } |
| 1244 | |
| 1245 | /** |
| 1246 | * ltt_relay_reserve_slot_lockless_slow - Atomic slot reservation in a buffer. |
| 1247 | * @trace: the trace structure to log to. |
| 1248 | * @ltt_channel: channel structure |
| 1249 | * @transport_data: data structure specific to ltt relay |
| 1250 | * @data_size: size of the variable length data to log. |
| 1251 | * @slot_size: pointer to total size of the slot (out) |
| 1252 | * @buf_offset : pointer to reserved buffer offset (out) |
| 1253 | * @tsc: pointer to the tsc at the slot reservation (out) |
| 1254 | * @cpu: cpuid |
| 1255 | * |
| 1256 | * Return : -ENOSPC if not enough space, else returns 0. |
| 1257 | * It will take care of sub-buffer switching. |
| 1258 | */ |
| 1259 | int ltt_reserve_slot_lockless_slow(struct ltt_chan *chan, |
| 1260 | struct ltt_trace *trace, size_t data_size, |
| 1261 | int largest_align, int cpu, |
| 1262 | struct ltt_chanbuf **ret_buf, |
| 1263 | size_t *slot_size, long *buf_offset, |
| 1264 | u64 *tsc, unsigned int *rflags) |
| 1265 | { |
| 1266 | struct ltt_chanbuf *buf = *ret_buf = per_cpu_ptr(chan->a.buf, cpu); |
| 1267 | struct ltt_reserve_switch_offsets offsets; |
| 1268 | |
| 1269 | offsets.size = 0; |
| 1270 | |
| 1271 | do { |
| 1272 | if (unlikely(ltt_relay_try_reserve_slow(buf, chan, &offsets, |
| 1273 | data_size, tsc, rflags, |
| 1274 | largest_align))) |
| 1275 | return -ENOSPC; |
| 1276 | } while (unlikely(local_cmpxchg(&buf->offset, offsets.old, offsets.end) |
| 1277 | != offsets.old)); |
| 1278 | |
| 1279 | /* |
| 1280 | * Atomically update last_tsc. This update races against concurrent |
| 1281 | * atomic updates, but the race will always cause supplementary full TSC |
| 1282 | * events, never the opposite (missing a full TSC event when it would be |
| 1283 | * needed). |
| 1284 | */ |
| 1285 | save_last_tsc(buf, *tsc); |
| 1286 | |
| 1287 | /* |
| 1288 | * Push the reader if necessary |
| 1289 | */ |
| 1290 | ltt_reserve_push_reader(buf, chan, offsets.end - 1); |
| 1291 | |
| 1292 | /* |
| 1293 | * Clear noref flag for this subbuffer. |
| 1294 | */ |
| 1295 | ltt_clear_noref_flag(&buf->a, SUBBUF_INDEX(offsets.end - 1, chan)); |
| 1296 | |
| 1297 | /* |
| 1298 | * Switch old subbuffer if needed. |
| 1299 | */ |
| 1300 | if (unlikely(offsets.end_switch_old)) { |
| 1301 | ltt_clear_noref_flag(&buf->a, SUBBUF_INDEX(offsets.old - 1, |
| 1302 | chan)); |
| 1303 | ltt_reserve_switch_old_subbuf(buf, chan, &offsets, tsc); |
| 1304 | } |
| 1305 | |
| 1306 | /* |
| 1307 | * Populate new subbuffer. |
| 1308 | */ |
| 1309 | if (unlikely(offsets.begin_switch)) |
| 1310 | ltt_reserve_switch_new_subbuf(buf, chan, &offsets, tsc); |
| 1311 | |
| 1312 | if (unlikely(offsets.end_switch_current)) |
| 1313 | ltt_reserve_end_switch_current(buf, chan, &offsets, tsc); |
| 1314 | |
| 1315 | *slot_size = offsets.size; |
| 1316 | *buf_offset = offsets.begin + offsets.before_hdr_pad; |
| 1317 | return 0; |
| 1318 | } |
| 1319 | EXPORT_SYMBOL_GPL(ltt_reserve_slot_lockless_slow); |
| 1320 | |
| 1321 | static struct ltt_transport ltt_relay_transport = { |
| 1322 | .name = "relay", |
| 1323 | .owner = THIS_MODULE, |
| 1324 | .ops = { |
| 1325 | .create_dirs = ltt_relay_create_dirs, |
| 1326 | .remove_dirs = ltt_relay_remove_dirs, |
| 1327 | .create_channel = ltt_chan_create, |
| 1328 | .finish_channel = ltt_relay_finish_channel, |
| 1329 | .remove_channel = ltt_chan_free, |
| 1330 | .remove_channel_files = ltt_chan_remove_files, |
| 1331 | .wakeup_channel = ltt_relay_async_wakeup_chan, |
| 1332 | .user_blocking = ltt_relay_user_blocking, |
| 1333 | .user_errors = ltt_relay_print_user_errors, |
| 1334 | .start_switch_timer = ltt_chan_start_switch_timer, |
| 1335 | .stop_switch_timer = ltt_chan_stop_switch_timer, |
| 1336 | }, |
| 1337 | }; |
| 1338 | |
| 1339 | static struct notifier_block fn_ltt_chanbuf_hotcpu_callback = { |
| 1340 | .notifier_call = ltt_chanbuf_hotcpu_callback, |
| 1341 | .priority = 6, |
| 1342 | }; |
| 1343 | |
| 1344 | int __init ltt_relay_init(void) |
| 1345 | { |
| 1346 | printk(KERN_INFO "LTT : ltt-relay init\n"); |
| 1347 | |
| 1348 | ltt_transport_register(<t_relay_transport); |
| 1349 | register_cpu_notifier(&fn_ltt_chanbuf_hotcpu_callback); |
| 1350 | register_idle_notifier(&pm_idle_entry_notifier); |
| 1351 | |
| 1352 | return 0; |
| 1353 | } |
| 1354 | |
| 1355 | void __exit ltt_relay_exit(void) |
| 1356 | { |
| 1357 | printk(KERN_INFO "LTT : ltt-relay exit\n"); |
| 1358 | |
| 1359 | unregister_idle_notifier(&pm_idle_entry_notifier); |
| 1360 | unregister_cpu_notifier(&fn_ltt_chanbuf_hotcpu_callback); |
| 1361 | ltt_transport_unregister(<t_relay_transport); |
| 1362 | } |
| 1363 | |
| 1364 | MODULE_LICENSE("GPL and additional rights"); |
| 1365 | MODULE_AUTHOR("Mathieu Desnoyers"); |
| 1366 | MODULE_DESCRIPTION("Linux Trace Toolkit Next Generation Lockless Relay"); |