Pull trace event headers into lttng modules tree
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Wed, 11 May 2011 14:56:53 +0000 (10:56 -0400)
committerMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Wed, 11 May 2011 14:56:53 +0000 (10:56 -0400)
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
19 files changed:
instrumentation/events/README [new file with mode: 0644]
instrumentation/events/lttng-module/block.h [new file with mode: 0644]
instrumentation/events/lttng-module/irq.h [new file with mode: 0644]
instrumentation/events/lttng-module/kvm.h [new file with mode: 0644]
instrumentation/events/lttng-module/sched.h [new file with mode: 0644]
instrumentation/events/lttng-patch/block.h [new file with mode: 0644]
instrumentation/events/lttng-patch/irq.h [new file with mode: 0644]
instrumentation/events/lttng-patch/kvm.h [new file with mode: 0644]
instrumentation/events/lttng-patch/sched.h [new file with mode: 0644]
instrumentation/events/mainline/block.h [new file with mode: 0644]
instrumentation/events/mainline/irq.h [new file with mode: 0644]
instrumentation/events/mainline/kvm.h [new file with mode: 0644]
instrumentation/events/mainline/sched.h [new file with mode: 0644]
probes/define_trace.h [new file with mode: 0644]
probes/lttng-events.h
probes/lttng-probe-block.c
probes/lttng-probe-irq.c
probes/lttng-probe-kvm.c
probes/lttng-probe-sched.c

diff --git a/instrumentation/events/README b/instrumentation/events/README
new file mode 100644 (file)
index 0000000..e972b5c
--- /dev/null
@@ -0,0 +1,11 @@
+The workflow for updating patches from newer kernel:
+
+Diff mainline/ and lttng-patch/ directories. (diff 1)
+Diff lttng-patch/ and lttng-module/ directories. (diff 2)
+
+Pull the new headers from mainline kernel to mainline/.
+Copy them into lttng-patch.
+Apply diff (1). Fix conflicts.
+
+Copy the resulting files into lttng-module/.
+Apply diff (2). Fix conflicts.
diff --git a/instrumentation/events/lttng-module/block.h b/instrumentation/events/lttng-module/block.h
new file mode 100644 (file)
index 0000000..3c75e4f
--- /dev/null
@@ -0,0 +1,620 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM block
+
+#if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BLOCK_H
+
+#include <linux/blktrace_api.h>
+#include <linux/blkdev.h>
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#ifndef _TRACE_BLOCK_DEF_
+#define _TRACE_BLOCK_DEF_
+
+#define __blk_dump_cmd(cmd, len)       "<unknown>"
+
+enum {
+       RWBS_FLAG_WRITE         = (1 << 0),
+       RWBS_FLAG_DISCARD       = (1 << 1),
+       RWBS_FLAG_READ          = (1 << 2),
+       RWBS_FLAG_RAHEAD        = (1 << 3),
+       RWBS_FLAG_SYNC          = (1 << 4),
+       RWBS_FLAG_META          = (1 << 5),
+       RWBS_FLAG_SECURE        = (1 << 6),
+};
+
+#endif /* _TRACE_BLOCK_DEF_ */
+
+#define __print_rwbs_flags(rwbs)               \
+       __print_flags(rwbs, "",                 \
+               { RWBS_FLAG_WRITE, "W" },       \
+               { RWBS_FLAG_DISCARD, "D" },     \
+               { RWBS_FLAG_READ, "R" },        \
+               { RWBS_FLAG_RAHEAD, "A" },      \
+               { RWBS_FLAG_SYNC, "S" },        \
+               { RWBS_FLAG_META, "M" },        \
+               { RWBS_FLAG_SECURE, "E" })
+
+#define blk_fill_rwbs(rwbs, rw, bytes)                                       \
+               tp_assign(rwbs, ((rw) & WRITE ? RWBS_FLAG_WRITE :             \
+                       ( (rw) & REQ_DISCARD ? RWBS_FLAG_DISCARD :            \
+                       ( (bytes) ? RWBS_FLAG_READ :                          \
+                       ( 0 ))))                                              \
+                       | ((rw) & REQ_RAHEAD ? RWBS_FLAG_RAHEAD : 0)          \
+                       | ((rw) & REQ_SYNC ? RWBS_FLAG_SYNC : 0)              \
+                       | ((rw) & REQ_META ? RWBS_FLAG_META : 0)              \
+                       | ((rw) & REQ_SECURE ? RWBS_FLAG_SECURE : 0))
+
+DECLARE_EVENT_CLASS(block_rq_with_error,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq),
+
+       TP_STRUCT__entry(
+               __field(  dev_t,        dev                     )
+               __field(  sector_t,     sector                  )
+               __field(  unsigned int, nr_sector               )
+               __field(  int,          errors                  )
+               __field(  unsigned int, rwbs                    )
+               __dynamic_array( char,  cmd,    blk_cmd_buf_len(rq)     )
+       ),
+
+       TP_fast_assign(
+               tp_assign(dev, rq->rq_disk ? disk_devt(rq->rq_disk) : 0)
+               tp_assign(sector, (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       0 : blk_rq_pos(rq))
+               tp_assign(nr_sector, (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       0 : blk_rq_sectors(rq))
+               tp_assign(errors, rq->errors)
+               blk_fill_rwbs(rwbs, rq->cmd_flags, blk_rq_bytes(rq))
+               tp_memcpy_dyn(cmd, rq->cmd, blk_cmd_buf_len(rq))
+       ),
+
+       TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 __blk_dump_cmd(__get_dynamic_array(cmd),
+                                __get_dynamic_array_len(cmd)),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->errors)
+)
+
+/**
+ * block_rq_abort - abort block operation request
+ * @q: queue containing the block operation request
+ * @rq: block IO operation request
+ *
+ * Called immediately after pending block IO operation request @rq in
+ * queue @q is aborted. The fields in the operation request @rq
+ * can be examined to determine which device and sectors the pending
+ * operation would access.
+ */
+DEFINE_EVENT(block_rq_with_error, block_rq_abort,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq)
+)
+
+/**
+ * block_rq_requeue - place block IO request back on a queue
+ * @q: queue holding operation
+ * @rq: block IO operation request
+ *
+ * The block operation request @rq is being placed back into queue
+ * @q.  For some reason the request was not completed and needs to be
+ * put back in the queue.
+ */
+DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq)
+)
+
+/**
+ * block_rq_complete - block IO operation completed by device driver
+ * @q: queue containing the block operation request
+ * @rq: block operations request
+ *
+ * The block_rq_complete tracepoint event indicates that some portion
+ * of operation request has been completed by the device driver.  If
+ * the @rq->bio is %NULL, then there is absolutely no additional work to
+ * do for the request. If @rq->bio is non-NULL then there is
+ * additional work required to complete the request.
+ */
+DEFINE_EVENT(block_rq_with_error, block_rq_complete,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq)
+)
+
+DECLARE_EVENT_CLASS(block_rq,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq),
+
+       TP_STRUCT__entry(
+               __field(  dev_t,        dev                     )
+               __field(  sector_t,     sector                  )
+               __field(  unsigned int, nr_sector               )
+               __field(  unsigned int, bytes                   )
+               __field(  unsigned int, rwbs                    )
+               __array(  char,         comm,   TASK_COMM_LEN   )
+               __dynamic_array( char,  cmd,    blk_cmd_buf_len(rq)     )
+       ),
+
+       TP_fast_assign(
+               tp_assign(dev, rq->rq_disk ? disk_devt(rq->rq_disk) : 0)
+               tp_assign(sector, (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       0 : blk_rq_pos(rq))
+               tp_assign(nr_sector, (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       0 : blk_rq_sectors(rq))
+               tp_assign(bytes, (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       blk_rq_bytes(rq) : 0)
+               blk_fill_rwbs(rwbs, rq->cmd_flags, blk_rq_bytes(rq))
+               tp_memcpy_dyn(cmd, rq->cmd, blk_cmd_buf_len(rq))
+               tp_memcpy(comm, current->comm, TASK_COMM_LEN)
+       ),
+
+       TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 __entry->bytes,
+                 __blk_dump_cmd(__get_dynamic_array(cmd),
+                                __get_dynamic_array_len(cmd)),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+)
+
+/**
+ * block_rq_insert - insert block operation request into queue
+ * @q: target queue
+ * @rq: block IO operation request
+ *
+ * Called immediately before block operation request @rq is inserted
+ * into queue @q.  The fields in the operation request @rq struct can
+ * be examined to determine which device and sectors the pending
+ * operation would access.
+ */
+DEFINE_EVENT(block_rq, block_rq_insert,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq)
+)
+
+/**
+ * block_rq_issue - issue pending block IO request operation to device driver
+ * @q: queue holding operation
+ * @rq: block IO operation operation request
+ *
+ * Called when block operation request @rq from queue @q is sent to a
+ * device driver for processing.
+ */
+DEFINE_EVENT(block_rq, block_rq_issue,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq)
+)
+
+/**
+ * block_bio_bounce - used bounce buffer when processing block operation
+ * @q: queue holding the block operation
+ * @bio: block operation
+ *
+ * A bounce buffer was used to handle the block operation @bio in @q.
+ * This occurs when hardware limitations prevent a direct transfer of
+ * data between the @bio data memory area and the IO device.  Use of a
+ * bounce buffer requires extra copying of data and decreases
+ * performance.
+ */
+TRACE_EVENT(block_bio_bounce,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
+               __field( sector_t,      sector                  )
+               __field( unsigned int,  nr_sector               )
+               __field( unsigned int,  rwbs                    )
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               tp_assign(dev, bio->bi_bdev ?
+                                         bio->bi_bdev->bd_dev : 0)
+               tp_assign(sector, bio->bi_sector)
+               tp_assign(nr_sector, bio->bi_size >> 9)
+               blk_fill_rwbs(rwbs, bio->bi_rw, bio->bi_size)
+               tp_memcpy(comm, current->comm, TASK_COMM_LEN)
+       ),
+
+       TP_printk("%d,%d %s %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+)
+
+/**
+ * block_bio_complete - completed all work on the block operation
+ * @q: queue holding the block operation
+ * @bio: block operation completed
+ * @error: io error value
+ *
+ * This tracepoint indicates there is no further work to do on this
+ * block IO operation @bio.
+ */
+TRACE_EVENT(block_bio_complete,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, int error),
+
+       TP_ARGS(q, bio, error),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev             )
+               __field( sector_t,      sector          )
+               __field( unsigned,      nr_sector       )
+               __field( int,           error           )
+               __field( unsigned int,  rwbs            )
+       ),
+
+       TP_fast_assign(
+               tp_assign(dev, bio->bi_bdev->bd_dev)
+               tp_assign(sector, bio->bi_sector)
+               tp_assign(nr_sector, bio->bi_size >> 9)
+               tp_assign(error, error)
+               blk_fill_rwbs(rwbs, bio->bi_rw, bio->bi_size)
+       ),
+
+       TP_printk("%d,%d %s %llu + %u [%d]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->error)
+)
+
+DECLARE_EVENT_CLASS(block_bio,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
+               __field( sector_t,      sector                  )
+               __field( unsigned int,  nr_sector               )
+               __field( unsigned int,  rwbs                    )
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               tp_assign(dev, bio->bi_bdev->bd_dev)
+               tp_assign(sector, bio->bi_sector)
+               tp_assign(nr_sector, bio->bi_size >> 9)
+               blk_fill_rwbs(rwbs, bio->bi_rw, bio->bi_size)
+               tp_memcpy(comm, current->comm, TASK_COMM_LEN)
+       ),
+
+       TP_printk("%d,%d %s %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+)
+
+/**
+ * block_bio_backmerge - merging block operation to the end of an existing operation
+ * @q: queue holding operation
+ * @bio: new block operation to merge
+ *
+ * Merging block request @bio to the end of an existing block request
+ * in queue @q.
+ */
+DEFINE_EVENT(block_bio, block_bio_backmerge,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio)
+)
+
+/**
+ * block_bio_frontmerge - merging block operation to the beginning of an existing operation
+ * @q: queue holding operation
+ * @bio: new block operation to merge
+ *
+ * Merging block IO operation @bio to the beginning of an existing block
+ * operation in queue @q.
+ */
+DEFINE_EVENT(block_bio, block_bio_frontmerge,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio)
+)
+
+/**
+ * block_bio_queue - putting new block IO operation in queue
+ * @q: queue holding operation
+ * @bio: new block operation
+ *
+ * About to place the block IO operation @bio into queue @q.
+ */
+DEFINE_EVENT(block_bio, block_bio_queue,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio)
+)
+
+DECLARE_EVENT_CLASS(block_get_rq,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+       TP_ARGS(q, bio, rw),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
+               __field( sector_t,      sector                  )
+               __field( unsigned int,  nr_sector               )
+               __field( unsigned int,  rwbs                    )
+               __array( char,          comm,   TASK_COMM_LEN   )
+        ),
+
+       TP_fast_assign(
+               tp_assign(dev, bio ? bio->bi_bdev->bd_dev : 0)
+               tp_assign(sector, bio ? bio->bi_sector : 0)
+               tp_assign(nr_sector, bio ? bio->bi_size >> 9 : 0)
+               blk_fill_rwbs(rwbs, bio ? bio->bi_rw : 0,
+                             bio ? bio->bi_size >> 9 : 0)
+               tp_memcpy(comm, current->comm, TASK_COMM_LEN)
+        ),
+
+       TP_printk("%d,%d %s %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+)
+
+/**
+ * block_getrq - get a free request entry in queue for block IO operations
+ * @q: queue for operations
+ * @bio: pending block IO operation
+ * @rw: low bit indicates a read (%0) or a write (%1)
+ *
+ * A request struct for queue @q has been allocated to handle the
+ * block IO operation @bio.
+ */
+DEFINE_EVENT(block_get_rq, block_getrq,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+       TP_ARGS(q, bio, rw)
+)
+
+/**
+ * block_sleeprq - waiting to get a free request entry in queue for block IO operation
+ * @q: queue for operation
+ * @bio: pending block IO operation
+ * @rw: low bit indicates a read (%0) or a write (%1)
+ *
+ * In the case where a request struct cannot be provided for queue @q
+ * the process needs to wait for an request struct to become
+ * available.  This tracepoint event is generated each time the
+ * process goes to sleep waiting for request struct become available.
+ */
+DEFINE_EVENT(block_get_rq, block_sleeprq,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+       TP_ARGS(q, bio, rw)
+)
+
+/**
+ * block_plug - keep operations requests in request queue
+ * @q: request queue to plug
+ *
+ * Plug the request queue @q.  Do not allow block operation requests
+ * to be sent to the device driver. Instead, accumulate requests in
+ * the queue to improve throughput performance of the block device.
+ */
+TRACE_EVENT(block_plug,
+
+       TP_PROTO(struct request_queue *q),
+
+       TP_ARGS(q),
+
+       TP_STRUCT__entry(
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, current->comm, TASK_COMM_LEN)
+       ),
+
+       TP_printk("[%s]", __entry->comm)
+)
+
+DECLARE_EVENT_CLASS(block_unplug,
+
+       TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit),
+
+       TP_ARGS(q, depth, explicit),
+
+       TP_STRUCT__entry(
+               __field( int,           nr_rq                   )
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               tp_assign(nr_rq, depth)
+               tp_memcpy(comm, current->comm, TASK_COMM_LEN)
+       ),
+
+       TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+)
+
+/**
+ * block_unplug - release of operations requests in request queue
+ * @q: request queue to unplug
+ * @depth: number of requests just added to the queue
+ * @explicit: whether this was an explicit unplug, or one from schedule()
+ *
+ * Unplug request queue @q because device driver is scheduled to work
+ * on elements in the request queue.
+ */
+DEFINE_EVENT(block_unplug, block_unplug,
+
+       TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit),
+
+       TP_ARGS(q, depth, explicit)
+)
+
+/**
+ * block_split - split a single bio struct into two bio structs
+ * @q: queue containing the bio
+ * @bio: block operation being split
+ * @new_sector: The starting sector for the new bio
+ *
+ * The bio request @bio in request queue @q needs to be split into two
+ * bio requests. The newly created @bio request starts at
+ * @new_sector. This split may be required due to hardware limitation
+ * such as operation crossing device boundaries in a RAID system.
+ */
+TRACE_EVENT(block_split,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio,
+                unsigned int new_sector),
+
+       TP_ARGS(q, bio, new_sector),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                             )
+               __field( sector_t,      sector                          )
+               __field( sector_t,      new_sector                      )
+               __field( unsigned int,  rwbs            )
+               __array( char,          comm,           TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               tp_assign(dev, bio->bi_bdev->bd_dev)
+               tp_assign(sector, bio->bi_sector)
+               tp_assign(new_sector, new_sector)
+               blk_fill_rwbs(rwbs, bio->bi_rw, bio->bi_size)
+               tp_memcpy(comm, current->comm, TASK_COMM_LEN)
+       ),
+
+       TP_printk("%d,%d %s %llu / %llu [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 (unsigned long long)__entry->sector,
+                 (unsigned long long)__entry->new_sector,
+                 __entry->comm)
+)
+
+/**
+ * block_bio_remap - map request for a logical device to the raw device
+ * @q: queue holding the operation
+ * @bio: revised operation
+ * @dev: device for the operation
+ * @from: original sector for the operation
+ *
+ * An operation for a logical device has been mapped to the
+ * raw block device.
+ */
+TRACE_EVENT(block_bio_remap,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
+                sector_t from),
+
+       TP_ARGS(q, bio, dev, from),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev             )
+               __field( sector_t,      sector          )
+               __field( unsigned int,  nr_sector       )
+               __field( dev_t,         old_dev         )
+               __field( sector_t,      old_sector      )
+               __field( unsigned int,  rwbs            )
+       ),
+
+       TP_fast_assign(
+               tp_assign(dev, bio->bi_bdev->bd_dev)
+               tp_assign(sector, bio->bi_sector)
+               tp_assign(nr_sector, bio->bi_size >> 9)
+               tp_assign(old_dev, dev)
+               tp_assign(old_sector, from)
+               blk_fill_rwbs(rwbs, bio->bi_rw, bio->bi_size)
+       ),
+
+       TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector,
+                 MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
+                 (unsigned long long)__entry->old_sector)
+)
+
+/**
+ * block_rq_remap - map request for a block operation request
+ * @q: queue holding the operation
+ * @rq: block IO operation request
+ * @dev: device for the operation
+ * @from: original sector for the operation
+ *
+ * The block operation request @rq in @q has been remapped.  The block
+ * operation request @rq holds the current information and @from hold
+ * the original sector.
+ */
+TRACE_EVENT(block_rq_remap,
+
+       TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev,
+                sector_t from),
+
+       TP_ARGS(q, rq, dev, from),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev             )
+               __field( sector_t,      sector          )
+               __field( unsigned int,  nr_sector       )
+               __field( dev_t,         old_dev         )
+               __field( sector_t,      old_sector      )
+               __field( unsigned int,  rwbs            )
+       ),
+
+       TP_fast_assign(
+               tp_assign(dev, disk_devt(rq->rq_disk))
+               tp_assign(sector, blk_rq_pos(rq))
+               tp_assign(nr_sector, blk_rq_sectors(rq))
+               tp_assign(old_dev, dev)
+               tp_assign(old_sector, from)
+               blk_fill_rwbs(rwbs, rq->cmd_flags, blk_rq_bytes(rq))
+       ),
+
+       TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector,
+                 MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
+                 (unsigned long long)__entry->old_sector)
+)
+
+#undef __print_rwbs_flags
+#undef blk_fill_rwbs
+
+#endif /* _TRACE_BLOCK_H */
+
+/* This part must be outside protection */
+#include "define_trace.h"
+
diff --git a/instrumentation/events/lttng-module/irq.h b/instrumentation/events/lttng-module/irq.h
new file mode 100644 (file)
index 0000000..ac590c9
--- /dev/null
@@ -0,0 +1,155 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM irq
+
+#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IRQ_H
+
+#include <linux/tracepoint.h>
+
+#ifndef _TRACE_IRQ_DEF_
+#define _TRACE_IRQ_DEF_
+
+struct irqaction;
+struct softirq_action;
+
+#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
+#define show_softirq_name(val)                         \
+       __print_symbolic(val,                           \
+                        softirq_name(HI),              \
+                        softirq_name(TIMER),           \
+                        softirq_name(NET_TX),          \
+                        softirq_name(NET_RX),          \
+                        softirq_name(BLOCK),           \
+                        softirq_name(BLOCK_IOPOLL),    \
+                        softirq_name(TASKLET),         \
+                        softirq_name(SCHED),           \
+                        softirq_name(HRTIMER),         \
+                        softirq_name(RCU))
+
+#endif /* _TRACE_IRQ_DEF_ */
+
+/**
+ * irq_handler_entry - called immediately before the irq action handler
+ * @irq: irq number
+ * @action: pointer to struct irqaction
+ *
+ * The struct irqaction pointed to by @action contains various
+ * information about the handler, including the device name,
+ * @action->name, and the device id, @action->dev_id. When used in
+ * conjunction with the irq_handler_exit tracepoint, we can figure
+ * out irq handler latencies.
+ */
+TRACE_EVENT(irq_handler_entry,
+
+       TP_PROTO(int irq, struct irqaction *action),
+
+       TP_ARGS(irq, action),
+
+       TP_STRUCT__entry(
+               __field(        int,    irq             )
+               __string(       name,   action->name    )
+       ),
+
+       TP_fast_assign(
+               tp_assign(irq, irq)
+               tp_strcpy(name, action->name)
+       ),
+
+       TP_printk("irq=%d name=%s", __entry->irq, __get_str(name))
+)
+
+/**
+ * irq_handler_exit - called immediately after the irq action handler returns
+ * @irq: irq number
+ * @action: pointer to struct irqaction
+ * @ret: return value
+ *
+ * If the @ret value is set to IRQ_HANDLED, then we know that the corresponding
+ * @action->handler scuccessully handled this irq. Otherwise, the irq might be
+ * a shared irq line, or the irq was not handled successfully. Can be used in
+ * conjunction with the irq_handler_entry to understand irq handler latencies.
+ */
+TRACE_EVENT(irq_handler_exit,
+
+       TP_PROTO(int irq, struct irqaction *action, int ret),
+
+       TP_ARGS(irq, action, ret),
+
+       TP_STRUCT__entry(
+               __field(        int,    irq     )
+               __field(        int,    ret     )
+       ),
+
+       TP_fast_assign(
+               tp_assign(irq, irq)
+               tp_assign(ret, ret)
+       ),
+
+       TP_printk("irq=%d ret=%s",
+                 __entry->irq, __entry->ret ? "handled" : "unhandled")
+)
+
+DECLARE_EVENT_CLASS(softirq,
+
+       TP_PROTO(unsigned int vec_nr),
+
+       TP_ARGS(vec_nr),
+
+       TP_STRUCT__entry(
+               __field(        unsigned int,   vec     )
+       ),
+
+       TP_fast_assign(
+               tp_assign(vec, vec_nr)
+       ),
+
+       TP_printk("vec=%u [action=%s]", __entry->vec,
+                 show_softirq_name(__entry->vec))
+)
+
+/**
+ * softirq_entry - called immediately before the softirq handler
+ * @vec_nr:  softirq vector number
+ *
+ * When used in combination with the softirq_exit tracepoint
+ * we can determine the softirq handler runtine.
+ */
+DEFINE_EVENT(softirq, softirq_entry,
+
+       TP_PROTO(unsigned int vec_nr),
+
+       TP_ARGS(vec_nr)
+)
+
+/**
+ * softirq_exit - called immediately after the softirq handler returns
+ * @vec_nr:  softirq vector number
+ *
+ * When used in combination with the softirq_entry tracepoint
+ * we can determine the softirq handler runtine.
+ */
+DEFINE_EVENT(softirq, softirq_exit,
+
+       TP_PROTO(unsigned int vec_nr),
+
+       TP_ARGS(vec_nr)
+)
+
+/**
+ * softirq_raise - called immediately when a softirq is raised
+ * @vec_nr:  softirq vector number
+ *
+ * When used in combination with the softirq_entry tracepoint
+ * we can determine the softirq raise to run latency.
+ */
+DEFINE_EVENT(softirq, softirq_raise,
+
+       TP_PROTO(unsigned int vec_nr),
+
+       TP_ARGS(vec_nr)
+)
+
+#endif /*  _TRACE_IRQ_H */
+
+/* This part must be outside protection */
+#include "define_trace.h"
diff --git a/instrumentation/events/lttng-module/kvm.h b/instrumentation/events/lttng-module/kvm.h
new file mode 100644 (file)
index 0000000..43914b3
--- /dev/null
@@ -0,0 +1,312 @@
+#if !defined(_TRACE_KVM_MAIN_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_MAIN_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+#define ERSN(x) { KVM_EXIT_##x, "KVM_EXIT_" #x }
+
+#define kvm_trace_exit_reason                                          \
+       ERSN(UNKNOWN), ERSN(EXCEPTION), ERSN(IO), ERSN(HYPERCALL),      \
+       ERSN(DEBUG), ERSN(HLT), ERSN(MMIO), ERSN(IRQ_WINDOW_OPEN),      \
+       ERSN(SHUTDOWN), ERSN(FAIL_ENTRY), ERSN(INTR), ERSN(SET_TPR),    \
+       ERSN(TPR_ACCESS), ERSN(S390_SIEIC), ERSN(S390_RESET), ERSN(DCR),\
+       ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI)
+
+TRACE_EVENT(kvm_userspace_exit,
+           TP_PROTO(__u32 reason, int errno),
+           TP_ARGS(reason, errno),
+
+       TP_STRUCT__entry(
+               __field(        __u32,          reason          )
+               __field(        int,            errno           )
+       ),
+
+       TP_fast_assign(
+               tp_assign(reason, reason)
+               tp_assign(errno, errno)
+       ),
+
+       TP_printk("reason %s (%d)",
+                 __entry->errno < 0 ?
+                 (__entry->errno == -EINTR ? "restart" : "error") :
+                 __print_symbolic(__entry->reason, kvm_trace_exit_reason),
+                 __entry->errno < 0 ? -__entry->errno : __entry->reason)
+)
+
+#if defined(__KVM_HAVE_IOAPIC)
+TRACE_EVENT(kvm_set_irq,
+       TP_PROTO(unsigned int gsi, int level, int irq_source_id),
+       TP_ARGS(gsi, level, irq_source_id),
+
+       TP_STRUCT__entry(
+               __field(        unsigned int,   gsi             )
+               __field(        int,            level           )
+               __field(        int,            irq_source_id   )
+       ),
+
+       TP_fast_assign(
+               tp_assign(gsi, gsi)
+               tp_assign(level, level)
+               tp_assign(irq_source_id, irq_source_id)
+       ),
+
+       TP_printk("gsi %u level %d source %d",
+                 __entry->gsi, __entry->level, __entry->irq_source_id)
+)
+
+#define kvm_deliver_mode               \
+       {0x0, "Fixed"},                 \
+       {0x1, "LowPrio"},               \
+       {0x2, "SMI"},                   \
+       {0x3, "Res3"},                  \
+       {0x4, "NMI"},                   \
+       {0x5, "INIT"},                  \
+       {0x6, "SIPI"},                  \
+       {0x7, "ExtINT"}
+
+TRACE_EVENT(kvm_ioapic_set_irq,
+           TP_PROTO(__u64 e, int pin, bool coalesced),
+           TP_ARGS(e, pin, coalesced),
+
+       TP_STRUCT__entry(
+               __field(        __u64,          e               )
+               __field(        int,            pin             )
+               __field(        bool,           coalesced       )
+       ),
+
+       TP_fast_assign(
+               tp_assign(e, e)
+               tp_assign(pin, pin)
+               tp_assign(coalesced, coalesced)
+       ),
+
+       TP_printk("pin %u dst %x vec=%u (%s|%s|%s%s)%s",
+                 __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e,
+                 __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
+                 (__entry->e & (1<<11)) ? "logical" : "physical",
+                 (__entry->e & (1<<15)) ? "level" : "edge",
+                 (__entry->e & (1<<16)) ? "|masked" : "",
+                 __entry->coalesced ? " (coalesced)" : "")
+)
+
+TRACE_EVENT(kvm_msi_set_irq,
+           TP_PROTO(__u64 address, __u64 data),
+           TP_ARGS(address, data),
+
+       TP_STRUCT__entry(
+               __field(        __u64,          address         )
+               __field(        __u64,          data            )
+       ),
+
+       TP_fast_assign(
+               tp_assign(address, address)
+               tp_assign(data, data)
+       ),
+
+       TP_printk("dst %u vec %x (%s|%s|%s%s)",
+                 (u8)(__entry->address >> 12), (u8)__entry->data,
+                 __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode),
+                 (__entry->address & (1<<2)) ? "logical" : "physical",
+                 (__entry->data & (1<<15)) ? "level" : "edge",
+                 (__entry->address & (1<<3)) ? "|rh" : "")
+)
+
+#define kvm_irqchips                                           \
+       {KVM_IRQCHIP_PIC_MASTER,        "PIC master"},          \
+       {KVM_IRQCHIP_PIC_SLAVE,         "PIC slave"},           \
+       {KVM_IRQCHIP_IOAPIC,            "IOAPIC"}
+
+TRACE_EVENT(kvm_ack_irq,
+       TP_PROTO(unsigned int irqchip, unsigned int pin),
+       TP_ARGS(irqchip, pin),
+
+       TP_STRUCT__entry(
+               __field(        unsigned int,   irqchip         )
+               __field(        unsigned int,   pin             )
+       ),
+
+       TP_fast_assign(
+               tp_assign(irqchip, irqchip)
+               tp_assign(pin, pin)
+       ),
+
+       TP_printk("irqchip %s pin %u",
+                 __print_symbolic(__entry->irqchip, kvm_irqchips),
+                __entry->pin)
+)
+
+
+
+#endif /* defined(__KVM_HAVE_IOAPIC) */
+
+#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
+#define KVM_TRACE_MMIO_READ 1
+#define KVM_TRACE_MMIO_WRITE 2
+
+#define kvm_trace_symbol_mmio \
+       { KVM_TRACE_MMIO_READ_UNSATISFIED, "unsatisfied-read" }, \
+       { KVM_TRACE_MMIO_READ, "read" }, \
+       { KVM_TRACE_MMIO_WRITE, "write" }
+
+TRACE_EVENT(kvm_mmio,
+       TP_PROTO(int type, int len, u64 gpa, u64 val),
+       TP_ARGS(type, len, gpa, val),
+
+       TP_STRUCT__entry(
+               __field(        u32,    type            )
+               __field(        u32,    len             )
+               __field(        u64,    gpa             )
+               __field(        u64,    val             )
+       ),
+
+       TP_fast_assign(
+               tp_assign(type, type)
+               tp_assign(len, len)
+               tp_assign(gpa, gpa)
+               tp_assign(val, val)
+       ),
+
+       TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
+                 __print_symbolic(__entry->type, kvm_trace_symbol_mmio),
+                 __entry->len, __entry->gpa, __entry->val)
+)
+
+#define kvm_fpu_load_symbol    \
+       {0, "unload"},          \
+       {1, "load"}
+
+TRACE_EVENT(kvm_fpu,
+       TP_PROTO(int load),
+       TP_ARGS(load),
+
+       TP_STRUCT__entry(
+               __field(        u32,            load            )
+       ),
+
+       TP_fast_assign(
+               tp_assign(load, load)
+       ),
+
+       TP_printk("%s", __print_symbolic(__entry->load, kvm_fpu_load_symbol))
+)
+
+TRACE_EVENT(kvm_age_page,
+       TP_PROTO(ulong hva, struct kvm_memory_slot *slot, int ref),
+       TP_ARGS(hva, slot, ref),
+
+       TP_STRUCT__entry(
+               __field(        u64,    hva             )
+               __field(        u64,    gfn             )
+               __field(        u8,     referenced      )
+       ),
+
+       TP_fast_assign(
+               tp_assign(hva, hva)
+               tp_assign(gfn,
+                 slot->base_gfn + ((hva - slot->userspace_addr) >> PAGE_SHIFT))
+               tp_assign(referenced, ref)
+       ),
+
+       TP_printk("hva %llx gfn %llx %s",
+                 __entry->hva, __entry->gfn,
+                 __entry->referenced ? "YOUNG" : "OLD")
+)
+
+#ifdef CONFIG_KVM_ASYNC_PF
+DECLARE_EVENT_CLASS(kvm_async_get_page_class,
+
+       TP_PROTO(u64 gva, u64 gfn),
+
+       TP_ARGS(gva, gfn),
+
+       TP_STRUCT__entry(
+               __field(__u64, gva)
+               __field(u64, gfn)
+       ),
+
+       TP_fast_assign(
+               tp_assign(gva, gva)
+               tp_assign(gfn, gfn)
+       ),
+
+       TP_printk("gva = %#llx, gfn = %#llx", __entry->gva, __entry->gfn)
+)
+
+DEFINE_EVENT(kvm_async_get_page_class, kvm_try_async_get_page,
+
+       TP_PROTO(u64 gva, u64 gfn),
+
+       TP_ARGS(gva, gfn)
+)
+
+DEFINE_EVENT(kvm_async_get_page_class, kvm_async_pf_doublefault,
+
+       TP_PROTO(u64 gva, u64 gfn),
+
+       TP_ARGS(gva, gfn)
+)
+
+DECLARE_EVENT_CLASS(kvm_async_pf_nopresent_ready,
+
+       TP_PROTO(u64 token, u64 gva),
+
+       TP_ARGS(token, gva),
+
+       TP_STRUCT__entry(
+               __field(__u64, token)
+               __field(__u64, gva)
+       ),
+
+       TP_fast_assign(
+               tp_assign(token, token)
+               tp_assign(gva, gva)
+       ),
+
+       TP_printk("token %#llx gva %#llx", __entry->token, __entry->gva)
+
+)
+
+DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_not_present,
+
+       TP_PROTO(u64 token, u64 gva),
+
+       TP_ARGS(token, gva)
+)
+
+DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready,
+
+       TP_PROTO(u64 token, u64 gva),
+
+       TP_ARGS(token, gva)
+)
+
+TRACE_EVENT(
+       kvm_async_pf_completed,
+       TP_PROTO(unsigned long address, struct page *page, u64 gva),
+       TP_ARGS(address, page, gva),
+
+       TP_STRUCT__entry(
+               __field(unsigned long, address)
+               __field(pfn_t, pfn)
+               __field(u64, gva)
+               ),
+
+       TP_fast_assign(
+               tp_assign(address, address)
+               tp_assign(pfn, page ? page_to_pfn(page) : 0)
+               tp_assign(gva, gva)
+               ),
+
+       TP_printk("gva %#llx address %#lx pfn %#llx",  __entry->gva,
+                 __entry->address, __entry->pfn)
+)
+
+#endif
+
+#endif /* _TRACE_KVM_MAIN_H */
+
+/* This part must be outside protection */
+#include "define_trace.h"
diff --git a/instrumentation/events/lttng-module/sched.h b/instrumentation/events/lttng-module/sched.h
new file mode 100644 (file)
index 0000000..4d61844
--- /dev/null
@@ -0,0 +1,400 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sched
+
+#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+#ifndef _TRACE_SCHED_DEF_
+#define _TRACE_SCHED_DEF_
+
+static inline long __trace_sched_switch_state(struct task_struct *p)
+{
+       long state = p->state;
+
+#ifdef CONFIG_PREEMPT
+       /*
+        * For all intents and purposes a preempted task is a running task.
+        */
+       if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
+               state = TASK_RUNNING;
+#endif
+
+       return state;
+}
+
+#endif /* _TRACE_SCHED_DEF_ */
+
+/*
+ * Tracepoint for calling kthread_stop, performed to end a kthread:
+ */
+TRACE_EVENT(sched_kthread_stop,
+
+       TP_PROTO(struct task_struct *t),
+
+       TP_ARGS(t),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, t->comm, TASK_COMM_LEN)
+               tp_assign(pid, t->pid)
+       ),
+
+       TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
+)
+
+/*
+ * Tracepoint for the return value of the kthread stopping:
+ */
+TRACE_EVENT(sched_kthread_stop_ret,
+
+       TP_PROTO(int ret),
+
+       TP_ARGS(ret),
+
+       TP_STRUCT__entry(
+               __field(        int,    ret     )
+       ),
+
+       TP_fast_assign(
+               tp_assign(ret, ret)
+       ),
+
+       TP_printk("ret=%d", __entry->ret)
+)
+
+/*
+ * Tracepoint for waking up a task:
+ */
+DECLARE_EVENT_CLASS(sched_wakeup_template,
+
+       TP_PROTO(struct task_struct *p, int success),
+
+       TP_ARGS(p, success),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+               __field(        int,    prio                    )
+               __field(        int,    success                 )
+               __field(        int,    target_cpu              )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, p->comm, TASK_COMM_LEN)
+               tp_assign(pid, p->pid)
+               tp_assign(prio, p->prio)
+               tp_assign(success, success)
+               tp_assign(target_cpu, task_cpu(p))
+       ),
+
+       TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
+                 __entry->comm, __entry->pid, __entry->prio,
+                 __entry->success, __entry->target_cpu)
+)
+
+DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
+            TP_PROTO(struct task_struct *p, int success),
+            TP_ARGS(p, success))
+
+/*
+ * Tracepoint for waking up a new task:
+ */
+DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
+            TP_PROTO(struct task_struct *p, int success),
+            TP_ARGS(p, success))
+
+/*
+ * Tracepoint for task switches, performed by the scheduler:
+ */
+TRACE_EVENT(sched_switch,
+
+       TP_PROTO(struct task_struct *prev,
+                struct task_struct *next),
+
+       TP_ARGS(prev, next),
+
+       TP_STRUCT__entry(
+               __array(        char,   prev_comm,      TASK_COMM_LEN   )
+               __field(        pid_t,  prev_pid                        )
+               __field(        int,    prev_prio                       )
+               __field(        long,   prev_state                      )
+               __array(        char,   next_comm,      TASK_COMM_LEN   )
+               __field(        pid_t,  next_pid                        )
+               __field(        int,    next_prio                       )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(next_comm, next->comm, TASK_COMM_LEN)
+               tp_assign(prev_pid, prev->pid)
+               tp_assign(prev_prio, prev->prio)
+               tp_assign(prev_state, __trace_sched_switch_state(prev))
+               tp_memcpy(prev_comm, prev->comm, TASK_COMM_LEN)
+               tp_assign(next_pid, next->pid)
+               tp_assign(next_prio, next->prio)
+       ),
+
+       TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d",
+               __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
+               __entry->prev_state ?
+                 __print_flags(__entry->prev_state, "|",
+                               { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
+                               { 16, "Z" }, { 32, "X" }, { 64, "x" },
+                               { 128, "W" }) : "R",
+               __entry->next_comm, __entry->next_pid, __entry->next_prio)
+)
+
+/*
+ * Tracepoint for a task being migrated:
+ */
+TRACE_EVENT(sched_migrate_task,
+
+       TP_PROTO(struct task_struct *p, int dest_cpu),
+
+       TP_ARGS(p, dest_cpu),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+               __field(        int,    prio                    )
+               __field(        int,    orig_cpu                )
+               __field(        int,    dest_cpu                )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, p->comm, TASK_COMM_LEN)
+               tp_assign(pid, p->pid)
+               tp_assign(prio, p->prio)
+               tp_assign(orig_cpu, task_cpu(p))
+               tp_assign(dest_cpu, dest_cpu)
+       ),
+
+       TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
+                 __entry->comm, __entry->pid, __entry->prio,
+                 __entry->orig_cpu, __entry->dest_cpu)
+)
+
+DECLARE_EVENT_CLASS(sched_process_template,
+
+       TP_PROTO(struct task_struct *p),
+
+       TP_ARGS(p),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+               __field(        int,    prio                    )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, p->comm, TASK_COMM_LEN)
+               tp_assign(pid, p->pid)
+               tp_assign(prio, p->prio)
+       ),
+
+       TP_printk("comm=%s pid=%d prio=%d",
+                 __entry->comm, __entry->pid, __entry->prio)
+)
+
+/*
+ * Tracepoint for freeing a task:
+ */
+DEFINE_EVENT(sched_process_template, sched_process_free,
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p))
+            
+
+/*
+ * Tracepoint for a task exiting:
+ */
+DEFINE_EVENT(sched_process_template, sched_process_exit,
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p))
+
+/*
+ * Tracepoint for waiting on task to unschedule:
+ */
+DEFINE_EVENT(sched_process_template, sched_wait_task,
+       TP_PROTO(struct task_struct *p),
+       TP_ARGS(p))
+
+/*
+ * Tracepoint for a waiting task:
+ */
+TRACE_EVENT(sched_process_wait,
+
+       TP_PROTO(struct pid *pid),
+
+       TP_ARGS(pid),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+               __field(        int,    prio                    )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, current->comm, TASK_COMM_LEN)
+               tp_assign(pid, pid_nr(pid))
+               tp_assign(prio, current->prio)
+       ),
+
+       TP_printk("comm=%s pid=%d prio=%d",
+                 __entry->comm, __entry->pid, __entry->prio)
+)
+
+/*
+ * Tracepoint for do_fork:
+ */
+TRACE_EVENT(sched_process_fork,
+
+       TP_PROTO(struct task_struct *parent, struct task_struct *child),
+
+       TP_ARGS(parent, child),
+
+       TP_STRUCT__entry(
+               __array(        char,   parent_comm,    TASK_COMM_LEN   )
+               __field(        pid_t,  parent_pid                      )
+               __array(        char,   child_comm,     TASK_COMM_LEN   )
+               __field(        pid_t,  child_pid                       )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(parent_comm, parent->comm, TASK_COMM_LEN)
+               tp_assign(parent_pid, parent->pid)
+               tp_memcpy(child_comm, child->comm, TASK_COMM_LEN)
+               tp_assign(child_pid, child->pid)
+       ),
+
+       TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
+               __entry->parent_comm, __entry->parent_pid,
+               __entry->child_comm, __entry->child_pid)
+)
+
+/*
+ * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
+ *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
+ */
+DECLARE_EVENT_CLASS(sched_stat_template,
+
+       TP_PROTO(struct task_struct *tsk, u64 delay),
+
+       TP_ARGS(tsk, delay),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( u64,   delay                   )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, tsk->comm, TASK_COMM_LEN)
+               tp_assign(pid,  tsk->pid)
+               tp_assign(delay, delay)
+       )
+       TP_perf_assign(
+               __perf_count(delay)
+       ),
+
+       TP_printk("comm=%s pid=%d delay=%Lu [ns]",
+                       __entry->comm, __entry->pid,
+                       (unsigned long long)__entry->delay)
+)
+
+
+/*
+ * Tracepoint for accounting wait time (time the task is runnable
+ * but not actually running due to scheduler contention).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_wait,
+            TP_PROTO(struct task_struct *tsk, u64 delay),
+            TP_ARGS(tsk, delay))
+
+/*
+ * Tracepoint for accounting sleep time (time the task is not runnable,
+ * including iowait, see below).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
+            TP_PROTO(struct task_struct *tsk, u64 delay),
+            TP_ARGS(tsk, delay))
+
+/*
+ * Tracepoint for accounting iowait time (time the task is not runnable
+ * due to waiting on IO to complete).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
+            TP_PROTO(struct task_struct *tsk, u64 delay),
+            TP_ARGS(tsk, delay))
+
+/*
+ * Tracepoint for accounting runtime (time the task is executing
+ * on a CPU).
+ */
+TRACE_EVENT(sched_stat_runtime,
+
+       TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
+
+       TP_ARGS(tsk, runtime, vruntime),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( u64,   runtime                 )
+               __field( u64,   vruntime                        )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, tsk->comm, TASK_COMM_LEN)
+               tp_assign(pid, tsk->pid)
+               tp_assign(runtime, runtime)
+               tp_assign(vruntime, vruntime)
+       )
+       TP_perf_assign(
+               __perf_count(runtime)
+       ),
+
+       TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
+                       __entry->comm, __entry->pid,
+                       (unsigned long long)__entry->runtime,
+                       (unsigned long long)__entry->vruntime)
+)
+
+/*
+ * Tracepoint for showing priority inheritance modifying a tasks
+ * priority.
+ */
+TRACE_EVENT(sched_pi_setprio,
+
+       TP_PROTO(struct task_struct *tsk, int newprio),
+
+       TP_ARGS(tsk, newprio),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( int,   oldprio                 )
+               __field( int,   newprio                 )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, tsk->comm, TASK_COMM_LEN)
+               tp_assign(pid, tsk->pid)
+               tp_assign(oldprio, tsk->prio)
+               tp_assign(newprio, newprio)
+       ),
+
+       TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
+                       __entry->comm, __entry->pid,
+                       __entry->oldprio, __entry->newprio)
+)
+
+#endif /* _TRACE_SCHED_H */
+
+/* This part must be outside protection */
+#include "define_trace.h"
diff --git a/instrumentation/events/lttng-patch/block.h b/instrumentation/events/lttng-patch/block.h
new file mode 100644 (file)
index 0000000..12e35e0
--- /dev/null
@@ -0,0 +1,626 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM block
+
+#if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BLOCK_H
+
+#include <linux/blktrace_api.h>
+#include <linux/blkdev.h>
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#ifndef _TRACE_BLOCK_DEF_
+#define _TRACE_BLOCK_DEF_
+
+static inline int blk_cmd_buf_len(struct request *rq)
+{
+       return (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? rq->cmd_len : 0;
+}
+
+const char *blk_dump_cmd(struct trace_seq *p, unsigned char *cmd, int len);
+#define __blk_dump_cmd(cmd, len)       blk_dump_cmd(p, cmd, len)
+
+enum {
+       RWBS_FLAG_WRITE         = (1 << 0),
+       RWBS_FLAG_DISCARD       = (1 << 1),
+       RWBS_FLAG_READ          = (1 << 2),
+       RWBS_FLAG_RAHEAD        = (1 << 3),
+       RWBS_FLAG_SYNC          = (1 << 4),
+       RWBS_FLAG_META          = (1 << 5),
+       RWBS_FLAG_SECURE        = (1 << 6),
+};
+
+#endif /* _TRACE_BLOCK_DEF_ */
+
+#define __print_rwbs_flags(rwbs)               \
+       __print_flags(rwbs, "",                 \
+               { RWBS_FLAG_WRITE, "W" },       \
+               { RWBS_FLAG_DISCARD, "D" },     \
+               { RWBS_FLAG_READ, "R" },        \
+               { RWBS_FLAG_RAHEAD, "A" },      \
+               { RWBS_FLAG_SYNC, "S" },        \
+               { RWBS_FLAG_META, "M" },        \
+               { RWBS_FLAG_SECURE, "E" })
+
+#define blk_fill_rwbs(rwbs, rw, bytes)                                       \
+               tp_assign(rwbs, ((rw) & WRITE ? RWBS_FLAG_WRITE :             \
+                       ( (rw) & REQ_DISCARD ? RWBS_FLAG_DISCARD :            \
+                       ( (bytes) ? RWBS_FLAG_READ :                          \
+                       ( 0 ))))                                              \
+                       | ((rw) & REQ_RAHEAD ? RWBS_FLAG_RAHEAD : 0)          \
+                       | ((rw) & REQ_SYNC ? RWBS_FLAG_SYNC : 0)              \
+                       | ((rw) & REQ_META ? RWBS_FLAG_META : 0)              \
+                       | ((rw) & REQ_SECURE ? RWBS_FLAG_SECURE : 0))
+
+DECLARE_EVENT_CLASS(block_rq_with_error,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq),
+
+       TP_STRUCT__entry(
+               __field(  dev_t,        dev                     )
+               __field(  sector_t,     sector                  )
+               __field(  unsigned int, nr_sector               )
+               __field(  int,          errors                  )
+               __field(  unsigned int, rwbs                    )
+               __dynamic_array( char,  cmd,    blk_cmd_buf_len(rq)     )
+       ),
+
+       TP_fast_assign(
+               tp_assign(dev, rq->rq_disk ? disk_devt(rq->rq_disk) : 0)
+               tp_assign(sector, (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       0 : blk_rq_pos(rq))
+               tp_assign(nr_sector, (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       0 : blk_rq_sectors(rq))
+               tp_assign(errors, rq->errors)
+               blk_fill_rwbs(rwbs, rq->cmd_flags, blk_rq_bytes(rq))
+               tp_memcpy_dyn(cmd, rq->cmd, blk_cmd_buf_len(rq))
+       ),
+
+       TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 __blk_dump_cmd(__get_dynamic_array(cmd),
+                                __get_dynamic_array_len(cmd)),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->errors)
+)
+
+/**
+ * block_rq_abort - abort block operation request
+ * @q: queue containing the block operation request
+ * @rq: block IO operation request
+ *
+ * Called immediately after pending block IO operation request @rq in
+ * queue @q is aborted. The fields in the operation request @rq
+ * can be examined to determine which device and sectors the pending
+ * operation would access.
+ */
+DEFINE_EVENT(block_rq_with_error, block_rq_abort,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq)
+)
+
+/**
+ * block_rq_requeue - place block IO request back on a queue
+ * @q: queue holding operation
+ * @rq: block IO operation request
+ *
+ * The block operation request @rq is being placed back into queue
+ * @q.  For some reason the request was not completed and needs to be
+ * put back in the queue.
+ */
+DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq)
+)
+
+/**
+ * block_rq_complete - block IO operation completed by device driver
+ * @q: queue containing the block operation request
+ * @rq: block operations request
+ *
+ * The block_rq_complete tracepoint event indicates that some portion
+ * of operation request has been completed by the device driver.  If
+ * the @rq->bio is %NULL, then there is absolutely no additional work to
+ * do for the request. If @rq->bio is non-NULL then there is
+ * additional work required to complete the request.
+ */
+DEFINE_EVENT(block_rq_with_error, block_rq_complete,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq)
+)
+
+DECLARE_EVENT_CLASS(block_rq,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq),
+
+       TP_STRUCT__entry(
+               __field(  dev_t,        dev                     )
+               __field(  sector_t,     sector                  )
+               __field(  unsigned int, nr_sector               )
+               __field(  unsigned int, bytes                   )
+               __field(  unsigned int, rwbs                    )
+               __array(  char,         comm,   TASK_COMM_LEN   )
+               __dynamic_array( char,  cmd,    blk_cmd_buf_len(rq)     )
+       ),
+
+       TP_fast_assign(
+               tp_assign(dev, rq->rq_disk ? disk_devt(rq->rq_disk) : 0)
+               tp_assign(sector, (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       0 : blk_rq_pos(rq))
+               tp_assign(nr_sector, (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       0 : blk_rq_sectors(rq))
+               tp_assign(bytes, (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       blk_rq_bytes(rq) : 0)
+               blk_fill_rwbs(rwbs, rq->cmd_flags, blk_rq_bytes(rq))
+               tp_memcpy_dyn(cmd, rq->cmd, blk_cmd_buf_len(rq))
+               tp_memcpy(comm, current->comm, TASK_COMM_LEN)
+       ),
+
+       TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 __entry->bytes,
+                 __blk_dump_cmd(__get_dynamic_array(cmd),
+                                __get_dynamic_array_len(cmd)),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+)
+
+/**
+ * block_rq_insert - insert block operation request into queue
+ * @q: target queue
+ * @rq: block IO operation request
+ *
+ * Called immediately before block operation request @rq is inserted
+ * into queue @q.  The fields in the operation request @rq struct can
+ * be examined to determine which device and sectors the pending
+ * operation would access.
+ */
+DEFINE_EVENT(block_rq, block_rq_insert,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq)
+)
+
+/**
+ * block_rq_issue - issue pending block IO request operation to device driver
+ * @q: queue holding operation
+ * @rq: block IO operation operation request
+ *
+ * Called when block operation request @rq from queue @q is sent to a
+ * device driver for processing.
+ */
+DEFINE_EVENT(block_rq, block_rq_issue,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq)
+)
+
+/**
+ * block_bio_bounce - used bounce buffer when processing block operation
+ * @q: queue holding the block operation
+ * @bio: block operation
+ *
+ * A bounce buffer was used to handle the block operation @bio in @q.
+ * This occurs when hardware limitations prevent a direct transfer of
+ * data between the @bio data memory area and the IO device.  Use of a
+ * bounce buffer requires extra copying of data and decreases
+ * performance.
+ */
+TRACE_EVENT(block_bio_bounce,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
+               __field( sector_t,      sector                  )
+               __field( unsigned int,  nr_sector               )
+               __field( unsigned int,  rwbs                    )
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               tp_assign(dev, bio->bi_bdev ?
+                                         bio->bi_bdev->bd_dev : 0)
+               tp_assign(sector, bio->bi_sector)
+               tp_assign(nr_sector, bio->bi_size >> 9)
+               blk_fill_rwbs(rwbs, bio->bi_rw, bio->bi_size)
+               tp_memcpy(comm, current->comm, TASK_COMM_LEN)
+       ),
+
+       TP_printk("%d,%d %s %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+)
+
+/**
+ * block_bio_complete - completed all work on the block operation
+ * @q: queue holding the block operation
+ * @bio: block operation completed
+ * @error: io error value
+ *
+ * This tracepoint indicates there is no further work to do on this
+ * block IO operation @bio.
+ */
+TRACE_EVENT(block_bio_complete,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, int error),
+
+       TP_ARGS(q, bio, error),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev             )
+               __field( sector_t,      sector          )
+               __field( unsigned,      nr_sector       )
+               __field( int,           error           )
+               __field( unsigned int,  rwbs            )
+       ),
+
+       TP_fast_assign(
+               tp_assign(dev, bio->bi_bdev->bd_dev)
+               tp_assign(sector, bio->bi_sector)
+               tp_assign(nr_sector, bio->bi_size >> 9)
+               tp_assign(error, error)
+               blk_fill_rwbs(rwbs, bio->bi_rw, bio->bi_size)
+       ),
+
+       TP_printk("%d,%d %s %llu + %u [%d]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->error)
+)
+
+DECLARE_EVENT_CLASS(block_bio,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
+               __field( sector_t,      sector                  )
+               __field( unsigned int,  nr_sector               )
+               __field( unsigned int,  rwbs                    )
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               tp_assign(dev, bio->bi_bdev->bd_dev)
+               tp_assign(sector, bio->bi_sector)
+               tp_assign(nr_sector, bio->bi_size >> 9)
+               blk_fill_rwbs(rwbs, bio->bi_rw, bio->bi_size)
+               tp_memcpy(comm, current->comm, TASK_COMM_LEN)
+       ),
+
+       TP_printk("%d,%d %s %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+)
+
+/**
+ * block_bio_backmerge - merging block operation to the end of an existing operation
+ * @q: queue holding operation
+ * @bio: new block operation to merge
+ *
+ * Merging block request @bio to the end of an existing block request
+ * in queue @q.
+ */
+DEFINE_EVENT(block_bio, block_bio_backmerge,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio)
+)
+
+/**
+ * block_bio_frontmerge - merging block operation to the beginning of an existing operation
+ * @q: queue holding operation
+ * @bio: new block operation to merge
+ *
+ * Merging block IO operation @bio to the beginning of an existing block
+ * operation in queue @q.
+ */
+DEFINE_EVENT(block_bio, block_bio_frontmerge,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio)
+)
+
+/**
+ * block_bio_queue - putting new block IO operation in queue
+ * @q: queue holding operation
+ * @bio: new block operation
+ *
+ * About to place the block IO operation @bio into queue @q.
+ */
+DEFINE_EVENT(block_bio, block_bio_queue,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio)
+)
+
+DECLARE_EVENT_CLASS(block_get_rq,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+       TP_ARGS(q, bio, rw),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
+               __field( sector_t,      sector                  )
+               __field( unsigned int,  nr_sector               )
+               __field( unsigned int,  rwbs                    )
+               __array( char,          comm,   TASK_COMM_LEN   )
+        ),
+
+       TP_fast_assign(
+               tp_assign(dev, bio ? bio->bi_bdev->bd_dev : 0)
+               tp_assign(sector, bio ? bio->bi_sector : 0)
+               tp_assign(nr_sector, bio ? bio->bi_size >> 9 : 0)
+               blk_fill_rwbs(rwbs, bio ? bio->bi_rw : 0,
+                             bio ? bio->bi_size >> 9 : 0)
+               tp_memcpy(comm, current->comm, TASK_COMM_LEN)
+        ),
+
+       TP_printk("%d,%d %s %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+)
+
+/**
+ * block_getrq - get a free request entry in queue for block IO operations
+ * @q: queue for operations
+ * @bio: pending block IO operation
+ * @rw: low bit indicates a read (%0) or a write (%1)
+ *
+ * A request struct for queue @q has been allocated to handle the
+ * block IO operation @bio.
+ */
+DEFINE_EVENT(block_get_rq, block_getrq,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+       TP_ARGS(q, bio, rw)
+)
+
+/**
+ * block_sleeprq - waiting to get a free request entry in queue for block IO operation
+ * @q: queue for operation
+ * @bio: pending block IO operation
+ * @rw: low bit indicates a read (%0) or a write (%1)
+ *
+ * In the case where a request struct cannot be provided for queue @q
+ * the process needs to wait for an request struct to become
+ * available.  This tracepoint event is generated each time the
+ * process goes to sleep waiting for request struct become available.
+ */
+DEFINE_EVENT(block_get_rq, block_sleeprq,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+       TP_ARGS(q, bio, rw)
+)
+
+/**
+ * block_plug - keep operations requests in request queue
+ * @q: request queue to plug
+ *
+ * Plug the request queue @q.  Do not allow block operation requests
+ * to be sent to the device driver. Instead, accumulate requests in
+ * the queue to improve throughput performance of the block device.
+ */
+TRACE_EVENT(block_plug,
+
+       TP_PROTO(struct request_queue *q),
+
+       TP_ARGS(q),
+
+       TP_STRUCT__entry(
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, current->comm, TASK_COMM_LEN)
+       ),
+
+       TP_printk("[%s]", __entry->comm)
+)
+
+DECLARE_EVENT_CLASS(block_unplug,
+
+       TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit),
+
+       TP_ARGS(q, depth, explicit),
+
+       TP_STRUCT__entry(
+               __field( int,           nr_rq                   )
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               tp_assign(nr_rq, depth)
+               tp_memcpy(comm, current->comm, TASK_COMM_LEN)
+       ),
+
+       TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+)
+
+/**
+ * block_unplug - release of operations requests in request queue
+ * @q: request queue to unplug
+ * @depth: number of requests just added to the queue
+ * @explicit: whether this was an explicit unplug, or one from schedule()
+ *
+ * Unplug request queue @q because device driver is scheduled to work
+ * on elements in the request queue.
+ */
+DEFINE_EVENT(block_unplug, block_unplug,
+
+       TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit),
+
+       TP_ARGS(q, depth, explicit)
+)
+
+/**
+ * block_split - split a single bio struct into two bio structs
+ * @q: queue containing the bio
+ * @bio: block operation being split
+ * @new_sector: The starting sector for the new bio
+ *
+ * The bio request @bio in request queue @q needs to be split into two
+ * bio requests. The newly created @bio request starts at
+ * @new_sector. This split may be required due to hardware limitation
+ * such as operation crossing device boundaries in a RAID system.
+ */
+TRACE_EVENT(block_split,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio,
+                unsigned int new_sector),
+
+       TP_ARGS(q, bio, new_sector),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                             )
+               __field( sector_t,      sector                          )
+               __field( sector_t,      new_sector                      )
+               __field( unsigned int,  rwbs            )
+               __array( char,          comm,           TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               tp_assign(dev, bio->bi_bdev->bd_dev)
+               tp_assign(sector, bio->bi_sector)
+               tp_assign(new_sector, new_sector)
+               blk_fill_rwbs(rwbs, bio->bi_rw, bio->bi_size)
+               tp_memcpy(comm, current->comm, TASK_COMM_LEN)
+       ),
+
+       TP_printk("%d,%d %s %llu / %llu [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 (unsigned long long)__entry->sector,
+                 (unsigned long long)__entry->new_sector,
+                 __entry->comm)
+)
+
+/**
+ * block_bio_remap - map request for a logical device to the raw device
+ * @q: queue holding the operation
+ * @bio: revised operation
+ * @dev: device for the operation
+ * @from: original sector for the operation
+ *
+ * An operation for a logical device has been mapped to the
+ * raw block device.
+ */
+TRACE_EVENT(block_bio_remap,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
+                sector_t from),
+
+       TP_ARGS(q, bio, dev, from),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev             )
+               __field( sector_t,      sector          )
+               __field( unsigned int,  nr_sector       )
+               __field( dev_t,         old_dev         )
+               __field( sector_t,      old_sector      )
+               __field( unsigned int,  rwbs            )
+       ),
+
+       TP_fast_assign(
+               tp_assign(dev, bio->bi_bdev->bd_dev)
+               tp_assign(sector, bio->bi_sector)
+               tp_assign(nr_sector, bio->bi_size >> 9)
+               tp_assign(old_dev, dev)
+               tp_assign(old_sector, from)
+               blk_fill_rwbs(rwbs, bio->bi_rw, bio->bi_size)
+       ),
+
+       TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector,
+                 MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
+                 (unsigned long long)__entry->old_sector)
+)
+
+/**
+ * block_rq_remap - map request for a block operation request
+ * @q: queue holding the operation
+ * @rq: block IO operation request
+ * @dev: device for the operation
+ * @from: original sector for the operation
+ *
+ * The block operation request @rq in @q has been remapped.  The block
+ * operation request @rq holds the current information and @from hold
+ * the original sector.
+ */
+TRACE_EVENT(block_rq_remap,
+
+       TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev,
+                sector_t from),
+
+       TP_ARGS(q, rq, dev, from),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev             )
+               __field( sector_t,      sector          )
+               __field( unsigned int,  nr_sector       )
+               __field( dev_t,         old_dev         )
+               __field( sector_t,      old_sector      )
+               __field( unsigned int,  rwbs            )
+       ),
+
+       TP_fast_assign(
+               tp_assign(dev, disk_devt(rq->rq_disk))
+               tp_assign(sector, blk_rq_pos(rq))
+               tp_assign(nr_sector, blk_rq_sectors(rq))
+               tp_assign(old_dev, dev)
+               tp_assign(old_sector, from)
+               blk_fill_rwbs(rwbs, rq->cmd_flags, blk_rq_bytes(rq))
+       ),
+
+       TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_rwbs_flags(__entry->rwbs),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector,
+                 MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
+                 (unsigned long long)__entry->old_sector)
+)
+
+#undef __print_rwbs_flags
+#undef blk_fill_rwbs
+
+#endif /* _TRACE_BLOCK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/instrumentation/events/lttng-patch/irq.h b/instrumentation/events/lttng-patch/irq.h
new file mode 100644 (file)
index 0000000..dd65797
--- /dev/null
@@ -0,0 +1,155 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM irq
+
+#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IRQ_H
+
+#include <linux/tracepoint.h>
+
+#ifndef _TRACE_IRQ_DEF_
+#define _TRACE_IRQ_DEF_
+
+struct irqaction;
+struct softirq_action;
+
+#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
+#define show_softirq_name(val)                         \
+       __print_symbolic(val,                           \
+                        softirq_name(HI),              \
+                        softirq_name(TIMER),           \
+                        softirq_name(NET_TX),          \
+                        softirq_name(NET_RX),          \
+                        softirq_name(BLOCK),           \
+                        softirq_name(BLOCK_IOPOLL),    \
+                        softirq_name(TASKLET),         \
+                        softirq_name(SCHED),           \
+                        softirq_name(HRTIMER),         \
+                        softirq_name(RCU))
+
+#endif /* _TRACE_IRQ_DEF_ */
+
+/**
+ * irq_handler_entry - called immediately before the irq action handler
+ * @irq: irq number
+ * @action: pointer to struct irqaction
+ *
+ * The struct irqaction pointed to by @action contains various
+ * information about the handler, including the device name,
+ * @action->name, and the device id, @action->dev_id. When used in
+ * conjunction with the irq_handler_exit tracepoint, we can figure
+ * out irq handler latencies.
+ */
+TRACE_EVENT(irq_handler_entry,
+
+       TP_PROTO(int irq, struct irqaction *action),
+
+       TP_ARGS(irq, action),
+
+       TP_STRUCT__entry(
+               __field(        int,    irq             )
+               __string(       name,   action->name    )
+       ),
+
+       TP_fast_assign(
+               tp_assign(irq, irq)
+               tp_strcpy(name, action->name)
+       ),
+
+       TP_printk("irq=%d name=%s", __entry->irq, __get_str(name))
+)
+
+/**
+ * irq_handler_exit - called immediately after the irq action handler returns
+ * @irq: irq number
+ * @action: pointer to struct irqaction
+ * @ret: return value
+ *
+ * If the @ret value is set to IRQ_HANDLED, then we know that the corresponding
+ * @action->handler scuccessully handled this irq. Otherwise, the irq might be
+ * a shared irq line, or the irq was not handled successfully. Can be used in
+ * conjunction with the irq_handler_entry to understand irq handler latencies.
+ */
+TRACE_EVENT(irq_handler_exit,
+
+       TP_PROTO(int irq, struct irqaction *action, int ret),
+
+       TP_ARGS(irq, action, ret),
+
+       TP_STRUCT__entry(
+               __field(        int,    irq     )
+               __field(        int,    ret     )
+       ),
+
+       TP_fast_assign(
+               tp_assign(irq, irq)
+               tp_assign(ret, ret)
+       ),
+
+       TP_printk("irq=%d ret=%s",
+                 __entry->irq, __entry->ret ? "handled" : "unhandled")
+)
+
+DECLARE_EVENT_CLASS(softirq,
+
+       TP_PROTO(unsigned int vec_nr),
+
+       TP_ARGS(vec_nr),
+
+       TP_STRUCT__entry(
+               __field(        unsigned int,   vec     )
+       ),
+
+       TP_fast_assign(
+               tp_assign(vec, vec_nr)
+       ),
+
+       TP_printk("vec=%u [action=%s]", __entry->vec,
+                 show_softirq_name(__entry->vec))
+)
+
+/**
+ * softirq_entry - called immediately before the softirq handler
+ * @vec_nr:  softirq vector number
+ *
+ * When used in combination with the softirq_exit tracepoint
+ * we can determine the softirq handler runtine.
+ */
+DEFINE_EVENT(softirq, softirq_entry,
+
+       TP_PROTO(unsigned int vec_nr),
+
+       TP_ARGS(vec_nr)
+)
+
+/**
+ * softirq_exit - called immediately after the softirq handler returns
+ * @vec_nr:  softirq vector number
+ *
+ * When used in combination with the softirq_entry tracepoint
+ * we can determine the softirq handler runtine.
+ */
+DEFINE_EVENT(softirq, softirq_exit,
+
+       TP_PROTO(unsigned int vec_nr),
+
+       TP_ARGS(vec_nr)
+)
+
+/**
+ * softirq_raise - called immediately when a softirq is raised
+ * @vec_nr:  softirq vector number
+ *
+ * When used in combination with the softirq_entry tracepoint
+ * we can determine the softirq raise to run latency.
+ */
+DEFINE_EVENT(softirq, softirq_raise,
+
+       TP_PROTO(unsigned int vec_nr),
+
+       TP_ARGS(vec_nr)
+)
+
+#endif /*  _TRACE_IRQ_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/instrumentation/events/lttng-patch/kvm.h b/instrumentation/events/lttng-patch/kvm.h
new file mode 100644 (file)
index 0000000..d7cc964
--- /dev/null
@@ -0,0 +1,312 @@
+#if !defined(_TRACE_KVM_MAIN_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_MAIN_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+#define ERSN(x) { KVM_EXIT_##x, "KVM_EXIT_" #x }
+
+#define kvm_trace_exit_reason                                          \
+       ERSN(UNKNOWN), ERSN(EXCEPTION), ERSN(IO), ERSN(HYPERCALL),      \
+       ERSN(DEBUG), ERSN(HLT), ERSN(MMIO), ERSN(IRQ_WINDOW_OPEN),      \
+       ERSN(SHUTDOWN), ERSN(FAIL_ENTRY), ERSN(INTR), ERSN(SET_TPR),    \
+       ERSN(TPR_ACCESS), ERSN(S390_SIEIC), ERSN(S390_RESET), ERSN(DCR),\
+       ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI)
+
+TRACE_EVENT(kvm_userspace_exit,
+           TP_PROTO(__u32 reason, int errno),
+           TP_ARGS(reason, errno),
+
+       TP_STRUCT__entry(
+               __field(        __u32,          reason          )
+               __field(        int,            errno           )
+       ),
+
+       TP_fast_assign(
+               tp_assign(reason, reason)
+               tp_assign(errno, errno)
+       ),
+
+       TP_printk("reason %s (%d)",
+                 __entry->errno < 0 ?
+                 (__entry->errno == -EINTR ? "restart" : "error") :
+                 __print_symbolic(__entry->reason, kvm_trace_exit_reason),
+                 __entry->errno < 0 ? -__entry->errno : __entry->reason)
+)
+
+#if defined(__KVM_HAVE_IOAPIC)
+TRACE_EVENT(kvm_set_irq,
+       TP_PROTO(unsigned int gsi, int level, int irq_source_id),
+       TP_ARGS(gsi, level, irq_source_id),
+
+       TP_STRUCT__entry(
+               __field(        unsigned int,   gsi             )
+               __field(        int,            level           )
+               __field(        int,            irq_source_id   )
+       ),
+
+       TP_fast_assign(
+               tp_assign(gsi, gsi)
+               tp_assign(level, level)
+               tp_assign(irq_source_id, irq_source_id)
+       ),
+
+       TP_printk("gsi %u level %d source %d",
+                 __entry->gsi, __entry->level, __entry->irq_source_id)
+)
+
+#define kvm_deliver_mode               \
+       {0x0, "Fixed"},                 \
+       {0x1, "LowPrio"},               \
+       {0x2, "SMI"},                   \
+       {0x3, "Res3"},                  \
+       {0x4, "NMI"},                   \
+       {0x5, "INIT"},                  \
+       {0x6, "SIPI"},                  \
+       {0x7, "ExtINT"}
+
+TRACE_EVENT(kvm_ioapic_set_irq,
+           TP_PROTO(__u64 e, int pin, bool coalesced),
+           TP_ARGS(e, pin, coalesced),
+
+       TP_STRUCT__entry(
+               __field(        __u64,          e               )
+               __field(        int,            pin             )
+               __field(        bool,           coalesced       )
+       ),
+
+       TP_fast_assign(
+               tp_assign(e, e)
+               tp_assign(pin, pin)
+               tp_assign(coalesced, coalesced)
+       ),
+
+       TP_printk("pin %u dst %x vec=%u (%s|%s|%s%s)%s",
+                 __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e,
+                 __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
+                 (__entry->e & (1<<11)) ? "logical" : "physical",
+                 (__entry->e & (1<<15)) ? "level" : "edge",
+                 (__entry->e & (1<<16)) ? "|masked" : "",
+                 __entry->coalesced ? " (coalesced)" : "")
+)
+
+TRACE_EVENT(kvm_msi_set_irq,
+           TP_PROTO(__u64 address, __u64 data),
+           TP_ARGS(address, data),
+
+       TP_STRUCT__entry(
+               __field(        __u64,          address         )
+               __field(        __u64,          data            )
+       ),
+
+       TP_fast_assign(
+               tp_assign(address, address)
+               tp_assign(data, data)
+       ),
+
+       TP_printk("dst %u vec %x (%s|%s|%s%s)",
+                 (u8)(__entry->address >> 12), (u8)__entry->data,
+                 __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode),
+                 (__entry->address & (1<<2)) ? "logical" : "physical",
+                 (__entry->data & (1<<15)) ? "level" : "edge",
+                 (__entry->address & (1<<3)) ? "|rh" : "")
+)
+
+#define kvm_irqchips                                           \
+       {KVM_IRQCHIP_PIC_MASTER,        "PIC master"},          \
+       {KVM_IRQCHIP_PIC_SLAVE,         "PIC slave"},           \
+       {KVM_IRQCHIP_IOAPIC,            "IOAPIC"}
+
+TRACE_EVENT(kvm_ack_irq,
+       TP_PROTO(unsigned int irqchip, unsigned int pin),
+       TP_ARGS(irqchip, pin),
+
+       TP_STRUCT__entry(
+               __field(        unsigned int,   irqchip         )
+               __field(        unsigned int,   pin             )
+       ),
+
+       TP_fast_assign(
+               tp_assign(irqchip, irqchip)
+               tp_assign(pin, pin)
+       ),
+
+       TP_printk("irqchip %s pin %u",
+                 __print_symbolic(__entry->irqchip, kvm_irqchips),
+                __entry->pin)
+)
+
+
+
+#endif /* defined(__KVM_HAVE_IOAPIC) */
+
+#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
+#define KVM_TRACE_MMIO_READ 1
+#define KVM_TRACE_MMIO_WRITE 2
+
+#define kvm_trace_symbol_mmio \
+       { KVM_TRACE_MMIO_READ_UNSATISFIED, "unsatisfied-read" }, \
+       { KVM_TRACE_MMIO_READ, "read" }, \
+       { KVM_TRACE_MMIO_WRITE, "write" }
+
+TRACE_EVENT(kvm_mmio,
+       TP_PROTO(int type, int len, u64 gpa, u64 val),
+       TP_ARGS(type, len, gpa, val),
+
+       TP_STRUCT__entry(
+               __field(        u32,    type            )
+               __field(        u32,    len             )
+               __field(        u64,    gpa             )
+               __field(        u64,    val             )
+       ),
+
+       TP_fast_assign(
+               tp_assign(type, type)
+               tp_assign(len, len)
+               tp_assign(gpa, gpa)
+               tp_assign(val, val)
+       ),
+
+       TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
+                 __print_symbolic(__entry->type, kvm_trace_symbol_mmio),
+                 __entry->len, __entry->gpa, __entry->val)
+)
+
+#define kvm_fpu_load_symbol    \
+       {0, "unload"},          \
+       {1, "load"}
+
+TRACE_EVENT(kvm_fpu,
+       TP_PROTO(int load),
+       TP_ARGS(load),
+
+       TP_STRUCT__entry(
+               __field(        u32,            load            )
+       ),
+
+       TP_fast_assign(
+               tp_assign(load, load)
+       ),
+
+       TP_printk("%s", __print_symbolic(__entry->load, kvm_fpu_load_symbol))
+)
+
+TRACE_EVENT(kvm_age_page,
+       TP_PROTO(ulong hva, struct kvm_memory_slot *slot, int ref),
+       TP_ARGS(hva, slot, ref),
+
+       TP_STRUCT__entry(
+               __field(        u64,    hva             )
+               __field(        u64,    gfn             )
+               __field(        u8,     referenced      )
+       ),
+
+       TP_fast_assign(
+               tp_assign(hva, hva)
+               tp_assign(gfn,
+                 slot->base_gfn + ((hva - slot->userspace_addr) >> PAGE_SHIFT))
+               tp_assign(referenced, ref)
+       ),
+
+       TP_printk("hva %llx gfn %llx %s",
+                 __entry->hva, __entry->gfn,
+                 __entry->referenced ? "YOUNG" : "OLD")
+)
+
+#ifdef CONFIG_KVM_ASYNC_PF
+DECLARE_EVENT_CLASS(kvm_async_get_page_class,
+
+       TP_PROTO(u64 gva, u64 gfn),
+
+       TP_ARGS(gva, gfn),
+
+       TP_STRUCT__entry(
+               __field(__u64, gva)
+               __field(u64, gfn)
+       ),
+
+       TP_fast_assign(
+               tp_assign(gva, gva)
+               tp_assign(gfn, gfn)
+       ),
+
+       TP_printk("gva = %#llx, gfn = %#llx", __entry->gva, __entry->gfn)
+)
+
+DEFINE_EVENT(kvm_async_get_page_class, kvm_try_async_get_page,
+
+       TP_PROTO(u64 gva, u64 gfn),
+
+       TP_ARGS(gva, gfn)
+)
+
+DEFINE_EVENT(kvm_async_get_page_class, kvm_async_pf_doublefault,
+
+       TP_PROTO(u64 gva, u64 gfn),
+
+       TP_ARGS(gva, gfn)
+)
+
+DECLARE_EVENT_CLASS(kvm_async_pf_nopresent_ready,
+
+       TP_PROTO(u64 token, u64 gva),
+
+       TP_ARGS(token, gva),
+
+       TP_STRUCT__entry(
+               __field(__u64, token)
+               __field(__u64, gva)
+       ),
+
+       TP_fast_assign(
+               tp_assign(token, token)
+               tp_assign(gva, gva)
+       ),
+
+       TP_printk("token %#llx gva %#llx", __entry->token, __entry->gva)
+
+)
+
+DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_not_present,
+
+       TP_PROTO(u64 token, u64 gva),
+
+       TP_ARGS(token, gva)
+)
+
+DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready,
+
+       TP_PROTO(u64 token, u64 gva),
+
+       TP_ARGS(token, gva)
+)
+
+TRACE_EVENT(
+       kvm_async_pf_completed,
+       TP_PROTO(unsigned long address, struct page *page, u64 gva),
+       TP_ARGS(address, page, gva),
+
+       TP_STRUCT__entry(
+               __field(unsigned long, address)
+               __field(pfn_t, pfn)
+               __field(u64, gva)
+               ),
+
+       TP_fast_assign(
+               tp_assign(address, address)
+               tp_assign(pfn, page ? page_to_pfn(page) : 0)
+               tp_assign(gva, gva)
+               ),
+
+       TP_printk("gva %#llx address %#lx pfn %#llx",  __entry->gva,
+                 __entry->address, __entry->pfn)
+)
+
+#endif
+
+#endif /* _TRACE_KVM_MAIN_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/instrumentation/events/lttng-patch/sched.h b/instrumentation/events/lttng-patch/sched.h
new file mode 100644 (file)
index 0000000..2e9ac2f
--- /dev/null
@@ -0,0 +1,402 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sched
+
+#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+#ifndef _TRACE_SCHED_DEF_
+#define _TRACE_SCHED_DEF_
+
+#ifdef CREATE_TRACE_POINTS
+static inline long __trace_sched_switch_state(struct task_struct *p)
+{
+       long state = p->state;
+
+#ifdef CONFIG_PREEMPT
+       /*
+        * For all intents and purposes a preempted task is a running task.
+        */
+       if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
+               state = TASK_RUNNING;
+#endif
+
+       return state;
+}
+#endif
+
+#endif /* _TRACE_SCHED_DEF_ */
+
+/*
+ * Tracepoint for calling kthread_stop, performed to end a kthread:
+ */
+TRACE_EVENT(sched_kthread_stop,
+
+       TP_PROTO(struct task_struct *t),
+
+       TP_ARGS(t),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, t->comm, TASK_COMM_LEN)
+               tp_assign(pid, t->pid)
+       ),
+
+       TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
+)
+
+/*
+ * Tracepoint for the return value of the kthread stopping:
+ */
+TRACE_EVENT(sched_kthread_stop_ret,
+
+       TP_PROTO(int ret),
+
+       TP_ARGS(ret),
+
+       TP_STRUCT__entry(
+               __field(        int,    ret     )
+       ),
+
+       TP_fast_assign(
+               tp_assign(ret, ret)
+       ),
+
+       TP_printk("ret=%d", __entry->ret)
+)
+
+/*
+ * Tracepoint for waking up a task:
+ */
+DECLARE_EVENT_CLASS(sched_wakeup_template,
+
+       TP_PROTO(struct task_struct *p, int success),
+
+       TP_ARGS(p, success),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+               __field(        int,    prio                    )
+               __field(        int,    success                 )
+               __field(        int,    target_cpu              )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, p->comm, TASK_COMM_LEN)
+               tp_assign(pid, p->pid)
+               tp_assign(prio, p->prio)
+               tp_assign(success, success)
+               tp_assign(target_cpu, task_cpu(p))
+       ),
+
+       TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
+                 __entry->comm, __entry->pid, __entry->prio,
+                 __entry->success, __entry->target_cpu)
+)
+
+DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
+            TP_PROTO(struct task_struct *p, int success),
+            TP_ARGS(p, success))
+
+/*
+ * Tracepoint for waking up a new task:
+ */
+DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
+            TP_PROTO(struct task_struct *p, int success),
+            TP_ARGS(p, success))
+
+/*
+ * Tracepoint for task switches, performed by the scheduler:
+ */
+TRACE_EVENT(sched_switch,
+
+       TP_PROTO(struct task_struct *prev,
+                struct task_struct *next),
+
+       TP_ARGS(prev, next),
+
+       TP_STRUCT__entry(
+               __array(        char,   prev_comm,      TASK_COMM_LEN   )
+               __field(        pid_t,  prev_pid                        )
+               __field(        int,    prev_prio                       )
+               __field(        long,   prev_state                      )
+               __array(        char,   next_comm,      TASK_COMM_LEN   )
+               __field(        pid_t,  next_pid                        )
+               __field(        int,    next_prio                       )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(next_comm, next->comm, TASK_COMM_LEN)
+               tp_assign(prev_pid, prev->pid)
+               tp_assign(prev_prio, prev->prio)
+               tp_assign(prev_state, __trace_sched_switch_state(prev))
+               tp_memcpy(prev_comm, prev->comm, TASK_COMM_LEN)
+               tp_assign(next_pid, next->pid)
+               tp_assign(next_prio, next->prio)
+       ),
+
+       TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d",
+               __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
+               __entry->prev_state ?
+                 __print_flags(__entry->prev_state, "|",
+                               { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
+                               { 16, "Z" }, { 32, "X" }, { 64, "x" },
+                               { 128, "W" }) : "R",
+               __entry->next_comm, __entry->next_pid, __entry->next_prio)
+)
+
+/*
+ * Tracepoint for a task being migrated:
+ */
+TRACE_EVENT(sched_migrate_task,
+
+       TP_PROTO(struct task_struct *p, int dest_cpu),
+
+       TP_ARGS(p, dest_cpu),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+               __field(        int,    prio                    )
+               __field(        int,    orig_cpu                )
+               __field(        int,    dest_cpu                )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, p->comm, TASK_COMM_LEN)
+               tp_assign(pid, p->pid)
+               tp_assign(prio, p->prio)
+               tp_assign(orig_cpu, task_cpu(p))
+               tp_assign(dest_cpu, dest_cpu)
+       ),
+
+       TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
+                 __entry->comm, __entry->pid, __entry->prio,
+                 __entry->orig_cpu, __entry->dest_cpu)
+)
+
+DECLARE_EVENT_CLASS(sched_process_template,
+
+       TP_PROTO(struct task_struct *p),
+
+       TP_ARGS(p),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+               __field(        int,    prio                    )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, p->comm, TASK_COMM_LEN)
+               tp_assign(pid, p->pid)
+               tp_assign(prio, p->prio)
+       ),
+
+       TP_printk("comm=%s pid=%d prio=%d",
+                 __entry->comm, __entry->pid, __entry->prio)
+)
+
+/*
+ * Tracepoint for freeing a task:
+ */
+DEFINE_EVENT(sched_process_template, sched_process_free,
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p))
+            
+
+/*
+ * Tracepoint for a task exiting:
+ */
+DEFINE_EVENT(sched_process_template, sched_process_exit,
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p))
+
+/*
+ * Tracepoint for waiting on task to unschedule:
+ */
+DEFINE_EVENT(sched_process_template, sched_wait_task,
+       TP_PROTO(struct task_struct *p),
+       TP_ARGS(p))
+
+/*
+ * Tracepoint for a waiting task:
+ */
+TRACE_EVENT(sched_process_wait,
+
+       TP_PROTO(struct pid *pid),
+
+       TP_ARGS(pid),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+               __field(        int,    prio                    )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, current->comm, TASK_COMM_LEN)
+               tp_assign(pid, pid_nr(pid))
+               tp_assign(prio, current->prio)
+       ),
+
+       TP_printk("comm=%s pid=%d prio=%d",
+                 __entry->comm, __entry->pid, __entry->prio)
+)
+
+/*
+ * Tracepoint for do_fork:
+ */
+TRACE_EVENT(sched_process_fork,
+
+       TP_PROTO(struct task_struct *parent, struct task_struct *child),
+
+       TP_ARGS(parent, child),
+
+       TP_STRUCT__entry(
+               __array(        char,   parent_comm,    TASK_COMM_LEN   )
+               __field(        pid_t,  parent_pid                      )
+               __array(        char,   child_comm,     TASK_COMM_LEN   )
+               __field(        pid_t,  child_pid                       )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(parent_comm, parent->comm, TASK_COMM_LEN)
+               tp_assign(parent_pid, parent->pid)
+               tp_memcpy(child_comm, child->comm, TASK_COMM_LEN)
+               tp_assign(child_pid, child->pid)
+       ),
+
+       TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
+               __entry->parent_comm, __entry->parent_pid,
+               __entry->child_comm, __entry->child_pid)
+)
+
+/*
+ * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
+ *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
+ */
+DECLARE_EVENT_CLASS(sched_stat_template,
+
+       TP_PROTO(struct task_struct *tsk, u64 delay),
+
+       TP_ARGS(tsk, delay),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( u64,   delay                   )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, tsk->comm, TASK_COMM_LEN)
+               tp_assign(pid,  tsk->pid)
+               tp_assign(delay, delay)
+       )
+       TP_perf_assign(
+               __perf_count(delay)
+       ),
+
+       TP_printk("comm=%s pid=%d delay=%Lu [ns]",
+                       __entry->comm, __entry->pid,
+                       (unsigned long long)__entry->delay)
+)
+
+
+/*
+ * Tracepoint for accounting wait time (time the task is runnable
+ * but not actually running due to scheduler contention).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_wait,
+            TP_PROTO(struct task_struct *tsk, u64 delay),
+            TP_ARGS(tsk, delay))
+
+/*
+ * Tracepoint for accounting sleep time (time the task is not runnable,
+ * including iowait, see below).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
+            TP_PROTO(struct task_struct *tsk, u64 delay),
+            TP_ARGS(tsk, delay))
+
+/*
+ * Tracepoint for accounting iowait time (time the task is not runnable
+ * due to waiting on IO to complete).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
+            TP_PROTO(struct task_struct *tsk, u64 delay),
+            TP_ARGS(tsk, delay))
+
+/*
+ * Tracepoint for accounting runtime (time the task is executing
+ * on a CPU).
+ */
+TRACE_EVENT(sched_stat_runtime,
+
+       TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
+
+       TP_ARGS(tsk, runtime, vruntime),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( u64,   runtime                 )
+               __field( u64,   vruntime                        )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, tsk->comm, TASK_COMM_LEN)
+               tp_assign(pid, tsk->pid)
+               tp_assign(runtime, runtime)
+               tp_assign(vruntime, vruntime)
+       )
+       TP_perf_assign(
+               __perf_count(runtime)
+       ),
+
+       TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
+                       __entry->comm, __entry->pid,
+                       (unsigned long long)__entry->runtime,
+                       (unsigned long long)__entry->vruntime)
+)
+
+/*
+ * Tracepoint for showing priority inheritance modifying a tasks
+ * priority.
+ */
+TRACE_EVENT(sched_pi_setprio,
+
+       TP_PROTO(struct task_struct *tsk, int newprio),
+
+       TP_ARGS(tsk, newprio),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( int,   oldprio                 )
+               __field( int,   newprio                 )
+       ),
+
+       TP_fast_assign(
+               tp_memcpy(comm, tsk->comm, TASK_COMM_LEN)
+               tp_assign(pid, tsk->pid)
+               tp_assign(oldprio, tsk->prio)
+               tp_assign(newprio, newprio)
+       ),
+
+       TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
+                       __entry->comm, __entry->pid,
+                       __entry->oldprio, __entry->newprio)
+)
+
+#endif /* _TRACE_SCHED_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/instrumentation/events/mainline/block.h b/instrumentation/events/mainline/block.h
new file mode 100644 (file)
index 0000000..bf36654
--- /dev/null
@@ -0,0 +1,569 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM block
+
+#if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BLOCK_H
+
+#include <linux/blktrace_api.h>
+#include <linux/blkdev.h>
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(block_rq_with_error,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq),
+
+       TP_STRUCT__entry(
+               __field(  dev_t,        dev                     )
+               __field(  sector_t,     sector                  )
+               __field(  unsigned int, nr_sector               )
+               __field(  int,          errors                  )
+               __array(  char,         rwbs,   6               )
+               __dynamic_array( char,  cmd,    blk_cmd_buf_len(rq)     )
+       ),
+
+       TP_fast_assign(
+               __entry->dev       = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+               __entry->sector    = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       0 : blk_rq_pos(rq);
+               __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       0 : blk_rq_sectors(rq);
+               __entry->errors    = rq->errors;
+
+               blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
+               blk_dump_cmd(__get_str(cmd), rq);
+       ),
+
+       TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->rwbs, __get_str(cmd),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->errors)
+);
+
+/**
+ * block_rq_abort - abort block operation request
+ * @q: queue containing the block operation request
+ * @rq: block IO operation request
+ *
+ * Called immediately after pending block IO operation request @rq in
+ * queue @q is aborted. The fields in the operation request @rq
+ * can be examined to determine which device and sectors the pending
+ * operation would access.
+ */
+DEFINE_EVENT(block_rq_with_error, block_rq_abort,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq)
+);
+
+/**
+ * block_rq_requeue - place block IO request back on a queue
+ * @q: queue holding operation
+ * @rq: block IO operation request
+ *
+ * The block operation request @rq is being placed back into queue
+ * @q.  For some reason the request was not completed and needs to be
+ * put back in the queue.
+ */
+DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq)
+);
+
+/**
+ * block_rq_complete - block IO operation completed by device driver
+ * @q: queue containing the block operation request
+ * @rq: block operations request
+ *
+ * The block_rq_complete tracepoint event indicates that some portion
+ * of operation request has been completed by the device driver.  If
+ * the @rq->bio is %NULL, then there is absolutely no additional work to
+ * do for the request. If @rq->bio is non-NULL then there is
+ * additional work required to complete the request.
+ */
+DEFINE_EVENT(block_rq_with_error, block_rq_complete,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq)
+);
+
+DECLARE_EVENT_CLASS(block_rq,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq),
+
+       TP_STRUCT__entry(
+               __field(  dev_t,        dev                     )
+               __field(  sector_t,     sector                  )
+               __field(  unsigned int, nr_sector               )
+               __field(  unsigned int, bytes                   )
+               __array(  char,         rwbs,   6               )
+               __array(  char,         comm,   TASK_COMM_LEN   )
+               __dynamic_array( char,  cmd,    blk_cmd_buf_len(rq)     )
+       ),
+
+       TP_fast_assign(
+               __entry->dev       = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+               __entry->sector    = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       0 : blk_rq_pos(rq);
+               __entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       0 : blk_rq_sectors(rq);
+               __entry->bytes     = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+                                       blk_rq_bytes(rq) : 0;
+
+               blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
+               blk_dump_cmd(__get_str(cmd), rq);
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->rwbs, __entry->bytes, __get_str(cmd),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+);
+
+/**
+ * block_rq_insert - insert block operation request into queue
+ * @q: target queue
+ * @rq: block IO operation request
+ *
+ * Called immediately before block operation request @rq is inserted
+ * into queue @q.  The fields in the operation request @rq struct can
+ * be examined to determine which device and sectors the pending
+ * operation would access.
+ */
+DEFINE_EVENT(block_rq, block_rq_insert,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq)
+);
+
+/**
+ * block_rq_issue - issue pending block IO request operation to device driver
+ * @q: queue holding operation
+ * @rq: block IO operation operation request
+ *
+ * Called when block operation request @rq from queue @q is sent to a
+ * device driver for processing.
+ */
+DEFINE_EVENT(block_rq, block_rq_issue,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq)
+);
+
+/**
+ * block_bio_bounce - used bounce buffer when processing block operation
+ * @q: queue holding the block operation
+ * @bio: block operation
+ *
+ * A bounce buffer was used to handle the block operation @bio in @q.
+ * This occurs when hardware limitations prevent a direct transfer of
+ * data between the @bio data memory area and the IO device.  Use of a
+ * bounce buffer requires extra copying of data and decreases
+ * performance.
+ */
+TRACE_EVENT(block_bio_bounce,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
+               __field( sector_t,      sector                  )
+               __field( unsigned int,  nr_sector               )
+               __array( char,          rwbs,   6               )
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = bio->bi_bdev ?
+                                         bio->bi_bdev->bd_dev : 0;
+               __entry->sector         = bio->bi_sector;
+               __entry->nr_sector      = bio->bi_size >> 9;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("%d,%d %s %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+);
+
+/**
+ * block_bio_complete - completed all work on the block operation
+ * @q: queue holding the block operation
+ * @bio: block operation completed
+ * @error: io error value
+ *
+ * This tracepoint indicates there is no further work to do on this
+ * block IO operation @bio.
+ */
+TRACE_EVENT(block_bio_complete,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, int error),
+
+       TP_ARGS(q, bio, error),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev             )
+               __field( sector_t,      sector          )
+               __field( unsigned,      nr_sector       )
+               __field( int,           error           )
+               __array( char,          rwbs,   6       )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = bio->bi_bdev->bd_dev;
+               __entry->sector         = bio->bi_sector;
+               __entry->nr_sector      = bio->bi_size >> 9;
+               __entry->error          = error;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+       ),
+
+       TP_printk("%d,%d %s %llu + %u [%d]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->error)
+);
+
+DECLARE_EVENT_CLASS(block_bio,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
+               __field( sector_t,      sector                  )
+               __field( unsigned int,  nr_sector               )
+               __array( char,          rwbs,   6               )
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = bio->bi_bdev->bd_dev;
+               __entry->sector         = bio->bi_sector;
+               __entry->nr_sector      = bio->bi_size >> 9;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("%d,%d %s %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+);
+
+/**
+ * block_bio_backmerge - merging block operation to the end of an existing operation
+ * @q: queue holding operation
+ * @bio: new block operation to merge
+ *
+ * Merging block request @bio to the end of an existing block request
+ * in queue @q.
+ */
+DEFINE_EVENT(block_bio, block_bio_backmerge,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio)
+);
+
+/**
+ * block_bio_frontmerge - merging block operation to the beginning of an existing operation
+ * @q: queue holding operation
+ * @bio: new block operation to merge
+ *
+ * Merging block IO operation @bio to the beginning of an existing block
+ * operation in queue @q.
+ */
+DEFINE_EVENT(block_bio, block_bio_frontmerge,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio)
+);
+
+/**
+ * block_bio_queue - putting new block IO operation in queue
+ * @q: queue holding operation
+ * @bio: new block operation
+ *
+ * About to place the block IO operation @bio into queue @q.
+ */
+DEFINE_EVENT(block_bio, block_bio_queue,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio)
+);
+
+DECLARE_EVENT_CLASS(block_get_rq,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+       TP_ARGS(q, bio, rw),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
+               __field( sector_t,      sector                  )
+               __field( unsigned int,  nr_sector               )
+               __array( char,          rwbs,   6               )
+               __array( char,          comm,   TASK_COMM_LEN   )
+        ),
+
+       TP_fast_assign(
+               __entry->dev            = bio ? bio->bi_bdev->bd_dev : 0;
+               __entry->sector         = bio ? bio->bi_sector : 0;
+               __entry->nr_sector      = bio ? bio->bi_size >> 9 : 0;
+               blk_fill_rwbs(__entry->rwbs,
+                             bio ? bio->bi_rw : 0, __entry->nr_sector);
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+        ),
+
+       TP_printk("%d,%d %s %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+);
+
+/**
+ * block_getrq - get a free request entry in queue for block IO operations
+ * @q: queue for operations
+ * @bio: pending block IO operation
+ * @rw: low bit indicates a read (%0) or a write (%1)
+ *
+ * A request struct for queue @q has been allocated to handle the
+ * block IO operation @bio.
+ */
+DEFINE_EVENT(block_get_rq, block_getrq,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+       TP_ARGS(q, bio, rw)
+);
+
+/**
+ * block_sleeprq - waiting to get a free request entry in queue for block IO operation
+ * @q: queue for operation
+ * @bio: pending block IO operation
+ * @rw: low bit indicates a read (%0) or a write (%1)
+ *
+ * In the case where a request struct cannot be provided for queue @q
+ * the process needs to wait for an request struct to become
+ * available.  This tracepoint event is generated each time the
+ * process goes to sleep waiting for request struct become available.
+ */
+DEFINE_EVENT(block_get_rq, block_sleeprq,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+       TP_ARGS(q, bio, rw)
+);
+
+/**
+ * block_plug - keep operations requests in request queue
+ * @q: request queue to plug
+ *
+ * Plug the request queue @q.  Do not allow block operation requests
+ * to be sent to the device driver. Instead, accumulate requests in
+ * the queue to improve throughput performance of the block device.
+ */
+TRACE_EVENT(block_plug,
+
+       TP_PROTO(struct request_queue *q),
+
+       TP_ARGS(q),
+
+       TP_STRUCT__entry(
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("[%s]", __entry->comm)
+);
+
+DECLARE_EVENT_CLASS(block_unplug,
+
+       TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit),
+
+       TP_ARGS(q, depth, explicit),
+
+       TP_STRUCT__entry(
+               __field( int,           nr_rq                   )
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               __entry->nr_rq = depth;
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+);
+
+/**
+ * block_unplug - release of operations requests in request queue
+ * @q: request queue to unplug
+ * @depth: number of requests just added to the queue
+ * @explicit: whether this was an explicit unplug, or one from schedule()
+ *
+ * Unplug request queue @q because device driver is scheduled to work
+ * on elements in the request queue.
+ */
+DEFINE_EVENT(block_unplug, block_unplug,
+
+       TP_PROTO(struct request_queue *q, unsigned int depth, bool explicit),
+
+       TP_ARGS(q, depth, explicit)
+);
+
+/**
+ * block_split - split a single bio struct into two bio structs
+ * @q: queue containing the bio
+ * @bio: block operation being split
+ * @new_sector: The starting sector for the new bio
+ *
+ * The bio request @bio in request queue @q needs to be split into two
+ * bio requests. The newly created @bio request starts at
+ * @new_sector. This split may be required due to hardware limitation
+ * such as operation crossing device boundaries in a RAID system.
+ */
+TRACE_EVENT(block_split,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio,
+                unsigned int new_sector),
+
+       TP_ARGS(q, bio, new_sector),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                             )
+               __field( sector_t,      sector                          )
+               __field( sector_t,      new_sector                      )
+               __array( char,          rwbs,           6               )
+               __array( char,          comm,           TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = bio->bi_bdev->bd_dev;
+               __entry->sector         = bio->bi_sector;
+               __entry->new_sector     = new_sector;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("%d,%d %s %llu / %llu [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 (unsigned long long)__entry->new_sector,
+                 __entry->comm)
+);
+
+/**
+ * block_bio_remap - map request for a logical device to the raw device
+ * @q: queue holding the operation
+ * @bio: revised operation
+ * @dev: device for the operation
+ * @from: original sector for the operation
+ *
+ * An operation for a logical device has been mapped to the
+ * raw block device.
+ */
+TRACE_EVENT(block_bio_remap,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
+                sector_t from),
+
+       TP_ARGS(q, bio, dev, from),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev             )
+               __field( sector_t,      sector          )
+               __field( unsigned int,  nr_sector       )
+               __field( dev_t,         old_dev         )
+               __field( sector_t,      old_sector      )
+               __array( char,          rwbs,   6       )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = bio->bi_bdev->bd_dev;
+               __entry->sector         = bio->bi_sector;
+               __entry->nr_sector      = bio->bi_size >> 9;
+               __entry->old_dev        = dev;
+               __entry->old_sector     = from;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+       ),
+
+       TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector,
+                 MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
+                 (unsigned long long)__entry->old_sector)
+);
+
+/**
+ * block_rq_remap - map request for a block operation request
+ * @q: queue holding the operation
+ * @rq: block IO operation request
+ * @dev: device for the operation
+ * @from: original sector for the operation
+ *
+ * The block operation request @rq in @q has been remapped.  The block
+ * operation request @rq holds the current information and @from hold
+ * the original sector.
+ */
+TRACE_EVENT(block_rq_remap,
+
+       TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev,
+                sector_t from),
+
+       TP_ARGS(q, rq, dev, from),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev             )
+               __field( sector_t,      sector          )
+               __field( unsigned int,  nr_sector       )
+               __field( dev_t,         old_dev         )
+               __field( sector_t,      old_sector      )
+               __array( char,          rwbs,   6       )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = disk_devt(rq->rq_disk);
+               __entry->sector         = blk_rq_pos(rq);
+               __entry->nr_sector      = blk_rq_sectors(rq);
+               __entry->old_dev        = dev;
+               __entry->old_sector     = from;
+               blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
+       ),
+
+       TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector,
+                 MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
+                 (unsigned long long)__entry->old_sector)
+);
+
+#endif /* _TRACE_BLOCK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/instrumentation/events/mainline/irq.h b/instrumentation/events/mainline/irq.h
new file mode 100644 (file)
index 0000000..1c09820
--- /dev/null
@@ -0,0 +1,150 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM irq
+
+#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IRQ_H
+
+#include <linux/tracepoint.h>
+
+struct irqaction;
+struct softirq_action;
+
+#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
+#define show_softirq_name(val)                         \
+       __print_symbolic(val,                           \
+                        softirq_name(HI),              \
+                        softirq_name(TIMER),           \
+                        softirq_name(NET_TX),          \
+                        softirq_name(NET_RX),          \
+                        softirq_name(BLOCK),           \
+                        softirq_name(BLOCK_IOPOLL),    \
+                        softirq_name(TASKLET),         \
+                        softirq_name(SCHED),           \
+                        softirq_name(HRTIMER),         \
+                        softirq_name(RCU))
+
+/**
+ * irq_handler_entry - called immediately before the irq action handler
+ * @irq: irq number
+ * @action: pointer to struct irqaction
+ *
+ * The struct irqaction pointed to by @action contains various
+ * information about the handler, including the device name,
+ * @action->name, and the device id, @action->dev_id. When used in
+ * conjunction with the irq_handler_exit tracepoint, we can figure
+ * out irq handler latencies.
+ */
+TRACE_EVENT(irq_handler_entry,
+
+       TP_PROTO(int irq, struct irqaction *action),
+
+       TP_ARGS(irq, action),
+
+       TP_STRUCT__entry(
+               __field(        int,    irq             )
+               __string(       name,   action->name    )
+       ),
+
+       TP_fast_assign(
+               __entry->irq = irq;
+               __assign_str(name, action->name);
+       ),
+
+       TP_printk("irq=%d name=%s", __entry->irq, __get_str(name))
+);
+
+/**
+ * irq_handler_exit - called immediately after the irq action handler returns
+ * @irq: irq number
+ * @action: pointer to struct irqaction
+ * @ret: return value
+ *
+ * If the @ret value is set to IRQ_HANDLED, then we know that the corresponding
+ * @action->handler scuccessully handled this irq. Otherwise, the irq might be
+ * a shared irq line, or the irq was not handled successfully. Can be used in
+ * conjunction with the irq_handler_entry to understand irq handler latencies.
+ */
+TRACE_EVENT(irq_handler_exit,
+
+       TP_PROTO(int irq, struct irqaction *action, int ret),
+
+       TP_ARGS(irq, action, ret),
+
+       TP_STRUCT__entry(
+               __field(        int,    irq     )
+               __field(        int,    ret     )
+       ),
+
+       TP_fast_assign(
+               __entry->irq    = irq;
+               __entry->ret    = ret;
+       ),
+
+       TP_printk("irq=%d ret=%s",
+                 __entry->irq, __entry->ret ? "handled" : "unhandled")
+);
+
+DECLARE_EVENT_CLASS(softirq,
+
+       TP_PROTO(unsigned int vec_nr),
+
+       TP_ARGS(vec_nr),
+
+       TP_STRUCT__entry(
+               __field(        unsigned int,   vec     )
+       ),
+
+       TP_fast_assign(
+               __entry->vec = vec_nr;
+       ),
+
+       TP_printk("vec=%u [action=%s]", __entry->vec,
+                 show_softirq_name(__entry->vec))
+);
+
+/**
+ * softirq_entry - called immediately before the softirq handler
+ * @vec_nr:  softirq vector number
+ *
+ * When used in combination with the softirq_exit tracepoint
+ * we can determine the softirq handler runtine.
+ */
+DEFINE_EVENT(softirq, softirq_entry,
+
+       TP_PROTO(unsigned int vec_nr),
+
+       TP_ARGS(vec_nr)
+);
+
+/**
+ * softirq_exit - called immediately after the softirq handler returns
+ * @vec_nr:  softirq vector number
+ *
+ * When used in combination with the softirq_entry tracepoint
+ * we can determine the softirq handler runtine.
+ */
+DEFINE_EVENT(softirq, softirq_exit,
+
+       TP_PROTO(unsigned int vec_nr),
+
+       TP_ARGS(vec_nr)
+);
+
+/**
+ * softirq_raise - called immediately when a softirq is raised
+ * @vec_nr:  softirq vector number
+ *
+ * When used in combination with the softirq_entry tracepoint
+ * we can determine the softirq raise to run latency.
+ */
+DEFINE_EVENT(softirq, softirq_raise,
+
+       TP_PROTO(unsigned int vec_nr),
+
+       TP_ARGS(vec_nr)
+);
+
+#endif /*  _TRACE_IRQ_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/instrumentation/events/mainline/kvm.h b/instrumentation/events/mainline/kvm.h
new file mode 100644 (file)
index 0000000..46e3cd8
--- /dev/null
@@ -0,0 +1,312 @@
+#if !defined(_TRACE_KVM_MAIN_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_MAIN_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+#define ERSN(x) { KVM_EXIT_##x, "KVM_EXIT_" #x }
+
+#define kvm_trace_exit_reason                                          \
+       ERSN(UNKNOWN), ERSN(EXCEPTION), ERSN(IO), ERSN(HYPERCALL),      \
+       ERSN(DEBUG), ERSN(HLT), ERSN(MMIO), ERSN(IRQ_WINDOW_OPEN),      \
+       ERSN(SHUTDOWN), ERSN(FAIL_ENTRY), ERSN(INTR), ERSN(SET_TPR),    \
+       ERSN(TPR_ACCESS), ERSN(S390_SIEIC), ERSN(S390_RESET), ERSN(DCR),\
+       ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI)
+
+TRACE_EVENT(kvm_userspace_exit,
+           TP_PROTO(__u32 reason, int errno),
+           TP_ARGS(reason, errno),
+
+       TP_STRUCT__entry(
+               __field(        __u32,          reason          )
+               __field(        int,            errno           )
+       ),
+
+       TP_fast_assign(
+               __entry->reason         = reason;
+               __entry->errno          = errno;
+       ),
+
+       TP_printk("reason %s (%d)",
+                 __entry->errno < 0 ?
+                 (__entry->errno == -EINTR ? "restart" : "error") :
+                 __print_symbolic(__entry->reason, kvm_trace_exit_reason),
+                 __entry->errno < 0 ? -__entry->errno : __entry->reason)
+);
+
+#if defined(__KVM_HAVE_IOAPIC)
+TRACE_EVENT(kvm_set_irq,
+       TP_PROTO(unsigned int gsi, int level, int irq_source_id),
+       TP_ARGS(gsi, level, irq_source_id),
+
+       TP_STRUCT__entry(
+               __field(        unsigned int,   gsi             )
+               __field(        int,            level           )
+               __field(        int,            irq_source_id   )
+       ),
+
+       TP_fast_assign(
+               __entry->gsi            = gsi;
+               __entry->level          = level;
+               __entry->irq_source_id  = irq_source_id;
+       ),
+
+       TP_printk("gsi %u level %d source %d",
+                 __entry->gsi, __entry->level, __entry->irq_source_id)
+);
+
+#define kvm_deliver_mode               \
+       {0x0, "Fixed"},                 \
+       {0x1, "LowPrio"},               \
+       {0x2, "SMI"},                   \
+       {0x3, "Res3"},                  \
+       {0x4, "NMI"},                   \
+       {0x5, "INIT"},                  \
+       {0x6, "SIPI"},                  \
+       {0x7, "ExtINT"}
+
+TRACE_EVENT(kvm_ioapic_set_irq,
+           TP_PROTO(__u64 e, int pin, bool coalesced),
+           TP_ARGS(e, pin, coalesced),
+
+       TP_STRUCT__entry(
+               __field(        __u64,          e               )
+               __field(        int,            pin             )
+               __field(        bool,           coalesced       )
+       ),
+
+       TP_fast_assign(
+               __entry->e              = e;
+               __entry->pin            = pin;
+               __entry->coalesced      = coalesced;
+       ),
+
+       TP_printk("pin %u dst %x vec=%u (%s|%s|%s%s)%s",
+                 __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e,
+                 __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
+                 (__entry->e & (1<<11)) ? "logical" : "physical",
+                 (__entry->e & (1<<15)) ? "level" : "edge",
+                 (__entry->e & (1<<16)) ? "|masked" : "",
+                 __entry->coalesced ? " (coalesced)" : "")
+);
+
+TRACE_EVENT(kvm_msi_set_irq,
+           TP_PROTO(__u64 address, __u64 data),
+           TP_ARGS(address, data),
+
+       TP_STRUCT__entry(
+               __field(        __u64,          address         )
+               __field(        __u64,          data            )
+       ),
+
+       TP_fast_assign(
+               __entry->address        = address;
+               __entry->data           = data;
+       ),
+
+       TP_printk("dst %u vec %x (%s|%s|%s%s)",
+                 (u8)(__entry->address >> 12), (u8)__entry->data,
+                 __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode),
+                 (__entry->address & (1<<2)) ? "logical" : "physical",
+                 (__entry->data & (1<<15)) ? "level" : "edge",
+                 (__entry->address & (1<<3)) ? "|rh" : "")
+);
+
+#define kvm_irqchips                                           \
+       {KVM_IRQCHIP_PIC_MASTER,        "PIC master"},          \
+       {KVM_IRQCHIP_PIC_SLAVE,         "PIC slave"},           \
+       {KVM_IRQCHIP_IOAPIC,            "IOAPIC"}
+
+TRACE_EVENT(kvm_ack_irq,
+       TP_PROTO(unsigned int irqchip, unsigned int pin),
+       TP_ARGS(irqchip, pin),
+
+       TP_STRUCT__entry(
+               __field(        unsigned int,   irqchip         )
+               __field(        unsigned int,   pin             )
+       ),
+
+       TP_fast_assign(
+               __entry->irqchip        = irqchip;
+               __entry->pin            = pin;
+       ),
+
+       TP_printk("irqchip %s pin %u",
+                 __print_symbolic(__entry->irqchip, kvm_irqchips),
+                __entry->pin)
+);
+
+
+
+#endif /* defined(__KVM_HAVE_IOAPIC) */
+
+#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
+#define KVM_TRACE_MMIO_READ 1
+#define KVM_TRACE_MMIO_WRITE 2
+
+#define kvm_trace_symbol_mmio \
+       { KVM_TRACE_MMIO_READ_UNSATISFIED, "unsatisfied-read" }, \
+       { KVM_TRACE_MMIO_READ, "read" }, \
+       { KVM_TRACE_MMIO_WRITE, "write" }
+
+TRACE_EVENT(kvm_mmio,
+       TP_PROTO(int type, int len, u64 gpa, u64 val),
+       TP_ARGS(type, len, gpa, val),
+
+       TP_STRUCT__entry(
+               __field(        u32,    type            )
+               __field(        u32,    len             )
+               __field(        u64,    gpa             )
+               __field(        u64,    val             )
+       ),
+
+       TP_fast_assign(
+               __entry->type           = type;
+               __entry->len            = len;
+               __entry->gpa            = gpa;
+               __entry->val            = val;
+       ),
+
+       TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
+                 __print_symbolic(__entry->type, kvm_trace_symbol_mmio),
+                 __entry->len, __entry->gpa, __entry->val)
+);
+
+#define kvm_fpu_load_symbol    \
+       {0, "unload"},          \
+       {1, "load"}
+
+TRACE_EVENT(kvm_fpu,
+       TP_PROTO(int load),
+       TP_ARGS(load),
+
+       TP_STRUCT__entry(
+               __field(        u32,            load            )
+       ),
+
+       TP_fast_assign(
+               __entry->load           = load;
+       ),
+
+       TP_printk("%s", __print_symbolic(__entry->load, kvm_fpu_load_symbol))
+);
+
+TRACE_EVENT(kvm_age_page,
+       TP_PROTO(ulong hva, struct kvm_memory_slot *slot, int ref),
+       TP_ARGS(hva, slot, ref),
+
+       TP_STRUCT__entry(
+               __field(        u64,    hva             )
+               __field(        u64,    gfn             )
+               __field(        u8,     referenced      )
+       ),
+
+       TP_fast_assign(
+               __entry->hva            = hva;
+               __entry->gfn            =
+                 slot->base_gfn + ((hva - slot->userspace_addr) >> PAGE_SHIFT);
+               __entry->referenced     = ref;
+       ),
+
+       TP_printk("hva %llx gfn %llx %s",
+                 __entry->hva, __entry->gfn,
+                 __entry->referenced ? "YOUNG" : "OLD")
+);
+
+#ifdef CONFIG_KVM_ASYNC_PF
+DECLARE_EVENT_CLASS(kvm_async_get_page_class,
+
+       TP_PROTO(u64 gva, u64 gfn),
+
+       TP_ARGS(gva, gfn),
+
+       TP_STRUCT__entry(
+               __field(__u64, gva)
+               __field(u64, gfn)
+       ),
+
+       TP_fast_assign(
+               __entry->gva = gva;
+               __entry->gfn = gfn;
+       ),
+
+       TP_printk("gva = %#llx, gfn = %#llx", __entry->gva, __entry->gfn)
+);
+
+DEFINE_EVENT(kvm_async_get_page_class, kvm_try_async_get_page,
+
+       TP_PROTO(u64 gva, u64 gfn),
+
+       TP_ARGS(gva, gfn)
+);
+
+DEFINE_EVENT(kvm_async_get_page_class, kvm_async_pf_doublefault,
+
+       TP_PROTO(u64 gva, u64 gfn),
+
+       TP_ARGS(gva, gfn)
+);
+
+DECLARE_EVENT_CLASS(kvm_async_pf_nopresent_ready,
+
+       TP_PROTO(u64 token, u64 gva),
+
+       TP_ARGS(token, gva),
+
+       TP_STRUCT__entry(
+               __field(__u64, token)
+               __field(__u64, gva)
+       ),
+
+       TP_fast_assign(
+               __entry->token = token;
+               __entry->gva = gva;
+       ),
+
+       TP_printk("token %#llx gva %#llx", __entry->token, __entry->gva)
+
+);
+
+DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_not_present,
+
+       TP_PROTO(u64 token, u64 gva),
+
+       TP_ARGS(token, gva)
+);
+
+DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready,
+
+       TP_PROTO(u64 token, u64 gva),
+
+       TP_ARGS(token, gva)
+);
+
+TRACE_EVENT(
+       kvm_async_pf_completed,
+       TP_PROTO(unsigned long address, struct page *page, u64 gva),
+       TP_ARGS(address, page, gva),
+
+       TP_STRUCT__entry(
+               __field(unsigned long, address)
+               __field(pfn_t, pfn)
+               __field(u64, gva)
+               ),
+
+       TP_fast_assign(
+               __entry->address = address;
+               __entry->pfn = page ? page_to_pfn(page) : 0;
+               __entry->gva = gva;
+               ),
+
+       TP_printk("gva %#llx address %#lx pfn %#llx",  __entry->gva,
+                 __entry->address, __entry->pfn)
+);
+
+#endif
+
+#endif /* _TRACE_KVM_MAIN_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/instrumentation/events/mainline/sched.h b/instrumentation/events/mainline/sched.h
new file mode 100644 (file)
index 0000000..f633478
--- /dev/null
@@ -0,0 +1,397 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sched
+
+#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+/*
+ * Tracepoint for calling kthread_stop, performed to end a kthread:
+ */
+TRACE_EVENT(sched_kthread_stop,
+
+       TP_PROTO(struct task_struct *t),
+
+       TP_ARGS(t),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
+               __entry->pid    = t->pid;
+       ),
+
+       TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
+);
+
+/*
+ * Tracepoint for the return value of the kthread stopping:
+ */
+TRACE_EVENT(sched_kthread_stop_ret,
+
+       TP_PROTO(int ret),
+
+       TP_ARGS(ret),
+
+       TP_STRUCT__entry(
+               __field(        int,    ret     )
+       ),
+
+       TP_fast_assign(
+               __entry->ret    = ret;
+       ),
+
+       TP_printk("ret=%d", __entry->ret)
+);
+
+/*
+ * Tracepoint for waking up a task:
+ */
+DECLARE_EVENT_CLASS(sched_wakeup_template,
+
+       TP_PROTO(struct task_struct *p, int success),
+
+       TP_ARGS(p, success),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+               __field(        int,    prio                    )
+               __field(        int,    success                 )
+               __field(        int,    target_cpu              )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+               __entry->pid            = p->pid;
+               __entry->prio           = p->prio;
+               __entry->success        = success;
+               __entry->target_cpu     = task_cpu(p);
+       ),
+
+       TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
+                 __entry->comm, __entry->pid, __entry->prio,
+                 __entry->success, __entry->target_cpu)
+);
+
+DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
+            TP_PROTO(struct task_struct *p, int success),
+            TP_ARGS(p, success));
+
+/*
+ * Tracepoint for waking up a new task:
+ */
+DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
+            TP_PROTO(struct task_struct *p, int success),
+            TP_ARGS(p, success));
+
+#ifdef CREATE_TRACE_POINTS
+static inline long __trace_sched_switch_state(struct task_struct *p)
+{
+       long state = p->state;
+
+#ifdef CONFIG_PREEMPT
+       /*
+        * For all intents and purposes a preempted task is a running task.
+        */
+       if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
+               state = TASK_RUNNING;
+#endif
+
+       return state;
+}
+#endif
+
+/*
+ * Tracepoint for task switches, performed by the scheduler:
+ */
+TRACE_EVENT(sched_switch,
+
+       TP_PROTO(struct task_struct *prev,
+                struct task_struct *next),
+
+       TP_ARGS(prev, next),
+
+       TP_STRUCT__entry(
+               __array(        char,   prev_comm,      TASK_COMM_LEN   )
+               __field(        pid_t,  prev_pid                        )
+               __field(        int,    prev_prio                       )
+               __field(        long,   prev_state                      )
+               __array(        char,   next_comm,      TASK_COMM_LEN   )
+               __field(        pid_t,  next_pid                        )
+               __field(        int,    next_prio                       )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
+               __entry->prev_pid       = prev->pid;
+               __entry->prev_prio      = prev->prio;
+               __entry->prev_state     = __trace_sched_switch_state(prev);
+               memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
+               __entry->next_pid       = next->pid;
+               __entry->next_prio      = next->prio;
+       ),
+
+       TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d",
+               __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
+               __entry->prev_state ?
+                 __print_flags(__entry->prev_state, "|",
+                               { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
+                               { 16, "Z" }, { 32, "X" }, { 64, "x" },
+                               { 128, "W" }) : "R",
+               __entry->next_comm, __entry->next_pid, __entry->next_prio)
+);
+
+/*
+ * Tracepoint for a task being migrated:
+ */
+TRACE_EVENT(sched_migrate_task,
+
+       TP_PROTO(struct task_struct *p, int dest_cpu),
+
+       TP_ARGS(p, dest_cpu),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+               __field(        int,    prio                    )
+               __field(        int,    orig_cpu                )
+               __field(        int,    dest_cpu                )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+               __entry->pid            = p->pid;
+               __entry->prio           = p->prio;
+               __entry->orig_cpu       = task_cpu(p);
+               __entry->dest_cpu       = dest_cpu;
+       ),
+
+       TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
+                 __entry->comm, __entry->pid, __entry->prio,
+                 __entry->orig_cpu, __entry->dest_cpu)
+);
+
+DECLARE_EVENT_CLASS(sched_process_template,
+
+       TP_PROTO(struct task_struct *p),
+
+       TP_ARGS(p),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+               __field(        int,    prio                    )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+               __entry->pid            = p->pid;
+               __entry->prio           = p->prio;
+       ),
+
+       TP_printk("comm=%s pid=%d prio=%d",
+                 __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for freeing a task:
+ */
+DEFINE_EVENT(sched_process_template, sched_process_free,
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p));
+            
+
+/*
+ * Tracepoint for a task exiting:
+ */
+DEFINE_EVENT(sched_process_template, sched_process_exit,
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p));
+
+/*
+ * Tracepoint for waiting on task to unschedule:
+ */
+DEFINE_EVENT(sched_process_template, sched_wait_task,
+       TP_PROTO(struct task_struct *p),
+       TP_ARGS(p));
+
+/*
+ * Tracepoint for a waiting task:
+ */
+TRACE_EVENT(sched_process_wait,
+
+       TP_PROTO(struct pid *pid),
+
+       TP_ARGS(pid),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+               __field(        int,    prio                    )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+               __entry->pid            = pid_nr(pid);
+               __entry->prio           = current->prio;
+       ),
+
+       TP_printk("comm=%s pid=%d prio=%d",
+                 __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for do_fork:
+ */
+TRACE_EVENT(sched_process_fork,
+
+       TP_PROTO(struct task_struct *parent, struct task_struct *child),
+
+       TP_ARGS(parent, child),
+
+       TP_STRUCT__entry(
+               __array(        char,   parent_comm,    TASK_COMM_LEN   )
+               __field(        pid_t,  parent_pid                      )
+               __array(        char,   child_comm,     TASK_COMM_LEN   )
+               __field(        pid_t,  child_pid                       )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
+               __entry->parent_pid     = parent->pid;
+               memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
+               __entry->child_pid      = child->pid;
+       ),
+
+       TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
+               __entry->parent_comm, __entry->parent_pid,
+               __entry->child_comm, __entry->child_pid)
+);
+
+/*
+ * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
+ *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
+ */
+DECLARE_EVENT_CLASS(sched_stat_template,
+
+       TP_PROTO(struct task_struct *tsk, u64 delay),
+
+       TP_ARGS(tsk, delay),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( u64,   delay                   )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid    = tsk->pid;
+               __entry->delay  = delay;
+       )
+       TP_perf_assign(
+               __perf_count(delay);
+       ),
+
+       TP_printk("comm=%s pid=%d delay=%Lu [ns]",
+                       __entry->comm, __entry->pid,
+                       (unsigned long long)__entry->delay)
+);
+
+
+/*
+ * Tracepoint for accounting wait time (time the task is runnable
+ * but not actually running due to scheduler contention).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_wait,
+            TP_PROTO(struct task_struct *tsk, u64 delay),
+            TP_ARGS(tsk, delay));
+
+/*
+ * Tracepoint for accounting sleep time (time the task is not runnable,
+ * including iowait, see below).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
+            TP_PROTO(struct task_struct *tsk, u64 delay),
+            TP_ARGS(tsk, delay));
+
+/*
+ * Tracepoint for accounting iowait time (time the task is not runnable
+ * due to waiting on IO to complete).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
+            TP_PROTO(struct task_struct *tsk, u64 delay),
+            TP_ARGS(tsk, delay));
+
+/*
+ * Tracepoint for accounting runtime (time the task is executing
+ * on a CPU).
+ */
+TRACE_EVENT(sched_stat_runtime,
+
+       TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
+
+       TP_ARGS(tsk, runtime, vruntime),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( u64,   runtime                 )
+               __field( u64,   vruntime                        )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid            = tsk->pid;
+               __entry->runtime        = runtime;
+               __entry->vruntime       = vruntime;
+       )
+       TP_perf_assign(
+               __perf_count(runtime);
+       ),
+
+       TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
+                       __entry->comm, __entry->pid,
+                       (unsigned long long)__entry->runtime,
+                       (unsigned long long)__entry->vruntime)
+);
+
+/*
+ * Tracepoint for showing priority inheritance modifying a tasks
+ * priority.
+ */
+TRACE_EVENT(sched_pi_setprio,
+
+       TP_PROTO(struct task_struct *tsk, int newprio),
+
+       TP_ARGS(tsk, newprio),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( int,   oldprio                 )
+               __field( int,   newprio                 )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid            = tsk->pid;
+               __entry->oldprio        = tsk->prio;
+               __entry->newprio        = newprio;
+       ),
+
+       TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
+                       __entry->comm, __entry->pid,
+                       __entry->oldprio, __entry->newprio)
+);
+
+#endif /* _TRACE_SCHED_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/probes/define_trace.h b/probes/define_trace.h
new file mode 100644 (file)
index 0000000..2224fd5
--- /dev/null
@@ -0,0 +1,123 @@
+/*
+ * Trace files that want to automate creationg of all tracepoints defined
+ * in their file should include this file. The following are macros that the
+ * trace file may define:
+ *
+ * TRACE_SYSTEM defines the system the tracepoint is for
+ *
+ * TRACE_INCLUDE_FILE if the file name is something other than TRACE_SYSTEM.h
+ *     This macro may be defined to tell define_trace.h what file to include.
+ *     Note, leave off the ".h".
+ *
+ * TRACE_INCLUDE_PATH if the path is something other than core kernel include/trace
+ *     then this macro can define the path to use. Note, the path is relative to
+ *     define_trace.h, not the file including it. Full path names for out of tree
+ *     modules must be used.
+ */
+
+#ifdef CREATE_TRACE_POINTS
+
+/* Prevent recursion */
+#undef CREATE_TRACE_POINTS
+
+#include <linux/stringify.h>
+/*
+ * module.h includes tracepoints, and because ftrace.h
+ * pulls in module.h:
+ *  trace/ftrace.h -> linux/ftrace_event.h -> linux/perf_event.h ->
+ *  linux/ftrace.h -> linux/module.h
+ * we must include module.h here before we play with any of
+ * the TRACE_EVENT() macros, otherwise the tracepoints included
+ * by module.h may break the build.
+ */
+#include <linux/module.h>
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
+       DEFINE_TRACE(name)
+
+#undef TRACE_EVENT_CONDITION
+#define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \
+       TRACE_EVENT(name,                                               \
+               PARAMS(proto),                                          \
+               PARAMS(args),                                           \
+               PARAMS(tstruct),                                        \
+               PARAMS(assign),                                         \
+               PARAMS(print))
+
+#undef TRACE_EVENT_FN
+#define TRACE_EVENT_FN(name, proto, args, tstruct,             \
+               assign, print, reg, unreg)                      \
+       DEFINE_TRACE_FN(name, reg, unreg)
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, name, proto, args) \
+       DEFINE_TRACE(name)
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
+       DEFINE_TRACE(name)
+
+#undef DEFINE_EVENT_CONDITION
+#define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) \
+       DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
+#undef DECLARE_TRACE
+#define DECLARE_TRACE(name, proto, args)       \
+       DEFINE_TRACE(name)
+
+#undef TRACE_INCLUDE
+#undef __TRACE_INCLUDE
+
+#ifndef TRACE_INCLUDE_FILE
+# define TRACE_INCLUDE_FILE TRACE_SYSTEM
+# define UNDEF_TRACE_INCLUDE_FILE
+#endif
+
+#ifndef TRACE_INCLUDE_PATH
+# define __TRACE_INCLUDE(system) <trace/events/system.h>
+# define UNDEF_TRACE_INCLUDE_PATH
+#else
+# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h)
+#endif
+
+# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system)
+
+/* Let the trace headers be reread */
+#define TRACE_HEADER_MULTI_READ
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/* Make all open coded DECLARE_TRACE nops */
+#undef DECLARE_TRACE
+#define DECLARE_TRACE(name, proto, args)
+
+#ifdef LTTNG_PACKAGE_BUILD
+#include "lttng-events.h"
+#endif
+
+#undef TRACE_EVENT
+#undef TRACE_EVENT_FN
+#undef TRACE_EVENT_CONDITION
+#undef DECLARE_EVENT_CLASS
+#undef DEFINE_EVENT
+#undef DEFINE_EVENT_PRINT
+#undef DEFINE_EVENT_CONDITION
+#undef TRACE_HEADER_MULTI_READ
+#undef DECLARE_TRACE
+
+/* Only undef what we defined in this file */
+#ifdef UNDEF_TRACE_INCLUDE_FILE
+# undef TRACE_INCLUDE_FILE
+# undef UNDEF_TRACE_INCLUDE_FILE
+#endif
+
+#ifdef UNDEF_TRACE_INCLUDE_PATH
+# undef TRACE_INCLUDE_PATH
+# undef UNDEF_TRACE_INCLUDE_PATH
+#endif
+
+/* We may be processing more files */
+#define CREATE_TRACE_POINTS
+
+#endif /* CREATE_TRACE_POINTS */
index f031d450e3f29ca4009d576bd4b8bda9068571b7..dacce7bd287f0413da50861ac71de3a396e8d951 100644 (file)
@@ -53,6 +53,28 @@ struct lttng_event_desc {
        TRACE_EVENT(name, PARAMS(proto), PARAMS(args),                  \
                PARAMS(tstruct), PARAMS(assign), PARAMS(print))         \
 
+/*
+ * Stage 0.1 of the trace events.
+ *
+ * Create dummy trace calls for each events, verifying that the LTTng module
+ * TRACE_EVENT headers match the kernel arguments. Will be optimized out by the
+ * compiler.
+ */
+
+#include "lttng-events-reset.h"        /* Reset all macros within TRACE_EVENT */
+
+#undef TP_PROTO
+#define TP_PROTO(args...) args
+
+#undef TP_ARGS
+#define TP_ARGS(args...) args
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(_template, _name, _proto, _args)                  \
+void trace_##_name(proto);
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
 /*
  * Stage 1 of the trace events.
  *
index 7bd5eaf08f6a4f0bd676017f1fc89ac61ef27084..43dd2be1c6b279d0e27440a836ef828a69445ceb 100644 (file)
@@ -9,14 +9,20 @@
 #include <linux/module.h>
 #include <linux/blktrace_api.h>
 
+/*
+ * Create the tracepoint static inlines from the kernel to validate that our
+ * trace event macros match the kernel we run on.
+ */
+#include <trace/events/block.h>
+
 /*
  * Create LTTng tracepoint probes.
  */
 #define LTTNG_PACKAGE_BUILD
 #define CREATE_TRACE_POINTS
-#undef CONFIG_EVENT_TRACING    /* Don't create Ftrace probes */
+#define TRACE_INCLUDE_PATH ../instrumentation/events/lttng-module
 
-#include <trace/events/block.h>
+#include "../instrumentation/events/lttng-module/block.h"
 
 MODULE_LICENSE("GPL and additional rights");
 MODULE_AUTHOR("Mathieu Desnoyers <mathieu.desnoyers@efficios.com>");
index bf0ad12aedb951d3a49765e6caace40de641cf50..fd4213f053852cf2b6c6f8fe095ccf288cd027bc 100644 (file)
@@ -9,14 +9,20 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 
+/*
+ * Create the tracepoint static inlines from the kernel to validate that our
+ * trace event macros match the kernel we run on.
+ */
+#include <trace/events/irq.h>
+
 /*
  * Create LTTng tracepoint probes.
  */
 #define LTTNG_PACKAGE_BUILD
 #define CREATE_TRACE_POINTS
-#undef CONFIG_EVENT_TRACING    /* Don't create Ftrace probes */
+#define TRACE_INCLUDE_PATH ../instrumentation/events/lttng-module
 
-#include <trace/events/irq.h>
+#include "../instrumentation/events/lttng-module/irq.h"
 
 MODULE_LICENSE("GPL and additional rights");
 MODULE_AUTHOR("Mathieu Desnoyers <mathieu.desnoyers@efficios.com>");
index b70b058bdecf9f123b801bd5f772738104c4e01f..05988e556c9cb4caf6833a49544e00cd0351f06a 100644 (file)
@@ -7,16 +7,22 @@
  */
 
 #include <linux/module.h>
+#include <linux/kvm_host.h>
+
+/*
+ * Create the tracepoint static inlines from the kernel to validate that our
+ * trace event macros match the kernel we run on.
+ */
+#include <trace/events/kvm.h>
 
 /*
  * Create LTTng tracepoint probes.
  */
 #define LTTNG_PACKAGE_BUILD
 #define CREATE_TRACE_POINTS
-#undef CONFIG_EVENT_TRACING    /* Don't create Ftrace probes */
+#define TRACE_INCLUDE_PATH ../instrumentation/events/lttng-module
 
-#include <linux/kvm_host.h>
-#include <trace/events/kvm.h>
+#include "../instrumentation/events/lttng-module/kvm.h"
 
 MODULE_LICENSE("GPL and additional rights");
 MODULE_AUTHOR("Mathieu Desnoyers <mathieu.desnoyers@efficios.com>");
index 99aadc46c21681bd9ca31f48dd8ee634e9b580d3..8f9f24702833d860fbd08f466449399d633d2651 100644 (file)
@@ -8,14 +8,20 @@
 
 #include <linux/module.h>
 
+/*
+ * Create the tracepoint static inlines from the kernel to validate that our
+ * trace event macros match the kernel we run on.
+ */
+#include <trace/events/sched.h>
+
 /*
  * Create LTTng tracepoint probes.
  */
 #define LTTNG_PACKAGE_BUILD
 #define CREATE_TRACE_POINTS
-#undef CONFIG_EVENT_TRACING    /* Don't create Ftrace probes */
+#define TRACE_INCLUDE_PATH ../instrumentation/events/lttng-module
 
-#include <trace/events/sched.h>
+#include "../instrumentation/events/lttng-module/sched.h"
 
 MODULE_LICENSE("GPL and additional rights");
 MODULE_AUTHOR("Mathieu Desnoyers <mathieu.desnoyers@efficios.com>");
This page took 0.120033 seconds and 4 git commands to generate.