From 82fa766de8ddb7e5ed9ac9f595bc2f97ee7f3d38 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 17 Nov 2011 10:52:50 -0500 Subject: [PATCH] Tracepoint: evaluate arguments within test block > On Thu, 2011-11-17 at 04:55 +0100, Eric Dumazet wrote: > > The general admitted claim of a tracepoint being on x86 a single > > instruction : > > > > jmp +0 > > > > Is not always true. > > > > For example in mm/slub.c, kmem_cache_alloc() > > > > void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); > > trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags); > > return ret; > > > > We can check compiler output and see that 4 extra instructions were > > added because s->objsize & s->size are evaluated. > > > > I noticed this in a perf session, because these 4 extra instructions > > added some noticeable latency/cost. > > > > c10e26a4: 8b 5d d8 mov -0x28(%ebp),%ebx > > c10e26a7: 85 db test %ebx,%ebx > > c10e26a9: 75 6d jne c10e2718 (doing the memset()) > > c10e26ab: 8b 76 0c mov 0xc(%esi),%esi // extra 1 > > c10e26ae: 8b 5d 04 mov 0x4(%ebp),%ebx // extra 2 > > c10e26b1: 89 75 f0 mov %esi,-0x10(%ebp) // extra 3 > > c10e26b4: 89 5d ec mov %ebx,-0x14(%ebp) // extra 4 > > c10e26b7: e9 00 00 00 00 jmp c10e26bc > > c10e26bc: 8b 45 d8 mov -0x28(%ebp),%eax > > c10e26bf: 83 c4 28 add $0x28,%esp > > c10e26c2: 5b pop %ebx > > c10e26c3: 5e pop %esi > > c10e26c4: 5f pop %edi > > c10e26c5: c9 leave > > > > > > A fix would be to not declare an inline function but a macro... > > > > #define trace_kmem_cache_alloc(...) \ > > if (static_branch(&__tracepoint_kmem_cache_alloc.key)) \ > > __DO_TRACE(&__tracepoint_kmem_cache_alloc, \ > > ... > > > > Anyone has some clever idea how to make this possible ? Do this by moving the test into the tracepoint() macro. Reported-by: Eric Dumazet Signed-off-by: Mathieu Desnoyers --- include/lttng/tracepoint.h | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/include/lttng/tracepoint.h b/include/lttng/tracepoint.h index 2ca45989..8e3b6044 100644 --- a/include/lttng/tracepoint.h +++ b/include/lttng/tracepoint.h @@ -35,8 +35,11 @@ extern "C" { * Tracepoints should be added to the instrumented code using the * "tracepoint()" macro. */ -#define tracepoint(provider, name, args...) \ - __trace_##provider##___##name(args) +#define tracepoint(provider, name, args...) \ + do { \ + if (caa_unlikely(__tracepoint_##provider##___##name.state)) \ + __trace_##provider##___##name(args); \ + } while (0) /* * it_func[0] is never NULL because there is at least one element in the array @@ -148,13 +151,6 @@ extern "C" { #define _TP_ARGS_PROTO_DATA(...) _TP_PROTO_DATA_N(_TP_NARGS(0, ##__VA_ARGS__), ##__VA_ARGS__) #define _TP_ARGS_VARS_DATA(...) _TP_VARS_DATA_N(_TP_NARGS(0, ##__VA_ARGS__), ##__VA_ARGS__) -#define __CHECK_TRACE(provider, name, proto, args) \ - do { \ - if (caa_unlikely(__tracepoint_##provider##___##name.state)) \ - __DO_TRACE(&__tracepoint_##provider##___##name, \ - TP_PARAMS(proto), TP_PARAMS(args)); \ - } while (0) - /* * Make sure the alignment of the structure in the __tracepoints section will * not add unwanted padding between the beginning of the section and the @@ -164,8 +160,8 @@ extern "C" { extern struct tracepoint __tracepoint_##provider##___##name; \ static inline void __trace_##provider##___##name(proto) \ { \ - __CHECK_TRACE(provider, name, TP_PARAMS(data_proto), \ - TP_PARAMS(data_args)); \ + __DO_TRACE(&__tracepoint_##provider##___##name, \ + TP_PARAMS(data_proto), TP_PARAMS(data_args)); \ } \ static inline int \ __register_trace_##provider##___##name(void (*probe)(data_proto), void *data) \ -- 2.34.1