X-Git-Url: http://git.lttng.org/?a=blobdiff_plain;f=lttng-context-callstack.c;h=432fadb652f349566ae1eee7e646ed29f83b507b;hb=e0407e483deb67b6f8617d7100278c97313a9914;hp=3b7859eb8542b8c6bc073288f2380cf3b29e37fb;hpb=64cc198b453a1fd05fb6f23bda08282ed0902c1c;p=lttng-modules.git diff --git a/lttng-context-callstack.c b/lttng-context-callstack.c index 3b7859eb..432fadb6 100644 --- a/lttng-context-callstack.c +++ b/lttng-context-callstack.c @@ -20,31 +20,32 @@ * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * - * The callstack context can be added to any kernel - * event. It records either the kernel or the userspace callstack, up to a - * max depth. The context is a CTF sequence, such that it uses only the space - * required for the number of callstack entries. + * The callstack context can be added to any kernel event. It records + * either the kernel or the userspace callstack, up to a max depth. The + * context is a CTF sequence, such that it uses only the space required + * for the number of callstack entries. * - * It allocates callstack buffers per-CPU up to 4 interrupt nesting. This - * nesting limit is the same as defined in the ring buffer. It therefore uses a - * fixed amount of memory, proportional to the number of CPUs: + * It allocates callstack buffers per-CPU up to 4 interrupt nesting. + * This nesting limit is the same as defined in the ring buffer. It + * therefore uses a fixed amount of memory, proportional to the number + * of CPUs: * * size = cpus * nest * depth * sizeof(unsigned long) * - * Which is about 800 bytes per-CPUs on 64-bit host and a depth of 25. The - * allocation is done at the initialization to avoid memory allocation - * overhead while tracing, using a shallow stack. + * Which is 4096 bytes per CPU on 64-bit host and a depth of 128. + * The allocation is done at the initialization to avoid memory + * allocation overhead while tracing, using a shallow stack. * * The kernel callstack is recovered using save_stack_trace(), and the * userspace callstack uses save_stack_trace_user(). They rely on frame - * pointers. These are usually available for the kernel, but the compiler - * option -fomit-frame-pointer frequently used in popular Linux distributions - * may cause the userspace callstack to be unreliable, and is a known - * limitation of this approach. If frame pointers are not available, it - * produces no error, but the callstack will be empty. We still provide the - * feature, because it works well for runtime environments having frame - * pointers. In the future, unwind support and/or last branch record may - * provide a solution to this problem. + * pointers. These are usually available for the kernel, but the + * compiler option -fomit-frame-pointer frequently used in popular Linux + * distributions may cause the userspace callstack to be unreliable, and + * is a known limitation of this approach. If frame pointers are not + * available, it produces no error, but the callstack will be empty. We + * still provide the feature, because it works well for runtime + * environments having frame pointers. In the future, unwind support + * and/or last branch record may provide a solution to this problem. * * The symbol name resolution is left to the trace reader. */ @@ -61,20 +62,26 @@ #include "wrapper/vmalloc.h" #include "lttng-tracer.h" -#define MAX_ENTRIES 25 +#define MAX_ENTRIES 128 -struct lttng_cs_nesting { +enum lttng_cs_ctx_modes { + CALLSTACK_KERNEL = 0, + CALLSTACK_USER = 1, + NR_CALLSTACK_MODES, +}; + +struct lttng_cs_dispatch { struct stack_trace stack_trace; unsigned long entries[MAX_ENTRIES]; }; struct lttng_cs { - struct lttng_cs_nesting level[RING_BUFFER_MAX_NESTING]; + struct lttng_cs_dispatch dispatch[RING_BUFFER_MAX_NESTING]; }; struct field_data { struct lttng_cs __percpu *cs_percpu; - int mode; + enum lttng_cs_ctx_modes mode; }; struct lttng_cs_type { @@ -83,11 +90,6 @@ struct lttng_cs_type { void (*save_func)(struct stack_trace *trace); }; -enum lttng_cs_ctx_modes { - CALLSTACK_KERNEL = 0, - CALLSTACK_USER = 1, -}; - static struct lttng_cs_type cs_types[] = { { .name = "callstack_kernel", @@ -102,7 +104,7 @@ static struct lttng_cs_type cs_types[] = { }; static -int init_type(int mode) +int init_type(enum lttng_cs_ctx_modes mode) { unsigned long func; @@ -118,14 +120,26 @@ int init_type(int mode) return 0; } +/* Keep track of nesting inside userspace callstack context code */ +DEFINE_PER_CPU(int, callstack_user_nesting); + static struct stack_trace *stack_trace_context(struct lttng_ctx_field *field, struct lib_ring_buffer_ctx *ctx) { - int nesting; + int buffer_nesting, cs_user_nesting; struct lttng_cs *cs; struct field_data *fdata = field->priv; + /* + * Do not gather the userspace callstack context when the event was + * triggered by the userspace callstack context saving mechanism. + */ + cs_user_nesting = per_cpu(callstack_user_nesting, ctx->cpu); + + if (fdata->mode == CALLSTACK_USER && cs_user_nesting >= 1) + return NULL; + /* * get_cpu() is not required, preemption is already * disabled while event is written. @@ -134,11 +148,11 @@ struct stack_trace *stack_trace_context(struct lttng_ctx_field *field, * Check it again as a safety net. */ cs = per_cpu_ptr(fdata->cs_percpu, ctx->cpu); - nesting = per_cpu(lib_ring_buffer_nesting, ctx->cpu) - 1; - if (nesting >= RING_BUFFER_MAX_NESTING) { + buffer_nesting = per_cpu(lib_ring_buffer_nesting, ctx->cpu) - 1; + if (buffer_nesting >= RING_BUFFER_MAX_NESTING) return NULL; - } - return &cs->level[nesting].stack_trace; + + return &cs->dispatch[buffer_nesting].stack_trace; } /* @@ -150,20 +164,31 @@ size_t lttng_callstack_get_size(size_t offset, struct lttng_ctx_field *field, struct lib_ring_buffer_ctx *ctx, struct lttng_channel *chan) { - size_t size = 0; struct stack_trace *trace; struct field_data *fdata = field->priv; + size_t orig_offset = offset; /* do not write data if no space is available */ trace = stack_trace_context(field, ctx); - if (!trace) - return 0; + if (unlikely(!trace)) { + offset += lib_ring_buffer_align(offset, lttng_alignof(unsigned int)); + offset += sizeof(unsigned int); + offset += lib_ring_buffer_align(offset, lttng_alignof(unsigned long)); + return offset - orig_offset; + } /* reset stack trace, no need to clear memory */ trace->nr_entries = 0; + if (fdata->mode == CALLSTACK_USER) + ++per_cpu(callstack_user_nesting, ctx->cpu); + /* do the real work and reserve space */ cs_types[fdata->mode].save_func(trace); + + if (fdata->mode == CALLSTACK_USER) + per_cpu(callstack_user_nesting, ctx->cpu)--; + /* * Remove final ULONG_MAX delimiter. If we cannot find it, add * our own marker to show that the stack is incomplete. This is @@ -173,14 +198,14 @@ size_t lttng_callstack_get_size(size_t offset, struct lttng_ctx_field *field, && trace->entries[trace->nr_entries - 1] == ULONG_MAX) { trace->nr_entries--; } - size += lib_ring_buffer_align(offset, lttng_alignof(unsigned int)); - size += sizeof(unsigned int); - size += lib_ring_buffer_align(offset, lttng_alignof(unsigned long)); - size += sizeof(unsigned long) * trace->nr_entries; + offset += lib_ring_buffer_align(offset, lttng_alignof(unsigned int)); + offset += sizeof(unsigned int); + offset += lib_ring_buffer_align(offset, lttng_alignof(unsigned long)); + offset += sizeof(unsigned long) * trace->nr_entries; /* Add our own ULONG_MAX delimiter to show incomplete stack. */ if (trace->nr_entries == trace->max_entries) - size += sizeof(unsigned long); - return size; + offset += sizeof(unsigned long); + return offset - orig_offset; } static @@ -191,8 +216,13 @@ void lttng_callstack_record(struct lttng_ctx_field *field, struct stack_trace *trace = stack_trace_context(field, ctx); unsigned int nr_seq_entries; - if (!trace) + if (unlikely(!trace)) { + nr_seq_entries = 0; + lib_ring_buffer_align_ctx(ctx, lttng_alignof(unsigned int)); + chan->ops->event_write(ctx, &nr_seq_entries, sizeof(unsigned int)); + lib_ring_buffer_align_ctx(ctx, lttng_alignof(unsigned long)); return; + } lib_ring_buffer_align_ctx(ctx, lttng_alignof(unsigned int)); nr_seq_entries = trace->nr_entries; if (trace->nr_entries == trace->max_entries) @@ -219,7 +249,7 @@ void field_data_free(struct field_data *fdata) } static -struct field_data __percpu *field_data_create(int type) +struct field_data __percpu *field_data_create(enum lttng_cs_ctx_modes mode) { int cpu, i; struct lttng_cs __percpu *cs_set; @@ -238,14 +268,14 @@ struct field_data __percpu *field_data_create(int type) cs = per_cpu_ptr(cs_set, cpu); for (i = 0; i < RING_BUFFER_MAX_NESTING; i++) { - struct lttng_cs_nesting *level; + struct lttng_cs_dispatch *dispatch; - level = &cs->level[i]; - level->stack_trace.entries = level->entries; - level->stack_trace.max_entries = MAX_ENTRIES; + dispatch = &cs->dispatch[i]; + dispatch->stack_trace.entries = dispatch->entries; + dispatch->stack_trace.max_entries = MAX_ENTRIES; } } - fdata->mode = type; + fdata->mode = mode; return fdata; error_alloc: @@ -262,7 +292,8 @@ void lttng_callstack_destroy(struct lttng_ctx_field *field) } static -int __lttng_add_callstack_generic(struct lttng_ctx **ctx, int mode) +int __lttng_add_callstack_generic(struct lttng_ctx **ctx, + enum lttng_cs_ctx_modes mode) { const char *ctx_name = cs_types[mode].name; struct lttng_ctx_field *field; @@ -336,8 +367,10 @@ int lttng_add_callstack_to_ctx(struct lttng_ctx **ctx, int type) switch (type) { case LTTNG_KERNEL_CONTEXT_CALLSTACK_KERNEL: return __lttng_add_callstack_generic(ctx, CALLSTACK_KERNEL); +#ifdef CONFIG_X86 case LTTNG_KERNEL_CONTEXT_CALLSTACK_USER: return __lttng_add_callstack_generic(ctx, CALLSTACK_USER); +#endif default: return -EINVAL; }