Fix: callstack context: false-sharing, bad memory size allocation
[lttng-modules.git] / lttng-context-callstack.c
CommitLineData
2fa2d39a
FG
1/*
2 * lttng-context-callstack.c
3 *
4 * LTTng callstack event context.
5 *
6 * Copyright (C) 2014 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 * Copyright (C) 2014 Francis Giraldeau <francis.giraldeau@gmail.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; only
12 * version 2.1 of the License.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 *
23 * The callstack context can be added to any kernel
24 * event. It records either the kernel or the userspace callstack, up to a
25 * max depth. The context is a CTF sequence, such that it uses only the space
26 * required for the number of callstack entries.
27 *
28 * It allocates callstack buffers per-CPU up to 4 interrupt nesting. This
29 * nesting limit is the same as defined in the ring buffer. It therefore uses a
30 * fixed amount of memory, proportional to the number of CPUs:
31 *
32 * size = cpus * nest * depth * sizeof(unsigned long)
33 *
34 * Which is about 800 bytes per-CPUs on 64-bit host and a depth of 25. The
35 * allocation is done at the initialization to avoid memory allocation
36 * overhead while tracing, using a shallow stack.
37 *
38 * The kernel callstack is recovered using save_stack_trace(), and the
39 * userspace callstack uses save_stack_trace_user(). They rely on frame
40 * pointers. These are usually available for the kernel, but the compiler
41 * option -fomit-frame-pointer frequently used in popular Linux distributions
42 * may cause the userspace callstack to be unreliable, and is a known
43 * limitation of this approach. If frame pointers are not available, it
44 * produces no error, but the callstack will be empty. We still provide the
45 * feature, because it works well for runtime environments having frame
46 * pointers. In the future, unwind support and/or last branch record may
47 * provide a solution to this problem.
48 *
49 * The symbol name resolution is left to the trace reader.
50 */
51
52#include <linux/module.h>
53#include <linux/slab.h>
54#include <linux/sched.h>
55#include <linux/utsname.h>
56#include <linux/stacktrace.h>
57#include <linux/spinlock.h>
58#include "lttng-events.h"
59#include "wrapper/ringbuffer/backend.h"
60#include "wrapper/ringbuffer/frontend.h"
61#include "wrapper/vmalloc.h"
62#include "lttng-tracer.h"
63
64cc198b
MD
64#define MAX_ENTRIES 25
65
66struct lttng_cs_nesting {
67 struct stack_trace stack_trace;
68 unsigned long entries[MAX_ENTRIES];
69};
2fa2d39a
FG
70
71struct lttng_cs {
64cc198b 72 struct lttng_cs_nesting level[RING_BUFFER_MAX_NESTING];
2fa2d39a
FG
73};
74
75struct field_data {
2fa2d39a 76 struct lttng_cs __percpu *cs_percpu;
64cc198b 77 int mode;
2fa2d39a
FG
78};
79
80struct lttng_cs_type {
81 const char *name;
82 const char *save_func_name;
83 void (*save_func)(struct stack_trace *trace);
84};
85
86enum lttng_cs_ctx_modes {
87 CALLSTACK_KERNEL = 0,
88 CALLSTACK_USER = 1,
89};
90
91static struct lttng_cs_type cs_types[] = {
92 {
64cc198b
MD
93 .name = "callstack_kernel",
94 .save_func_name = "save_stack_trace",
95 .save_func = NULL,
2fa2d39a
FG
96 },
97 {
64cc198b
MD
98 .name = "callstack_user",
99 .save_func_name = "save_stack_trace_user",
100 .save_func = NULL,
2fa2d39a
FG
101 },
102};
103
104static
105int init_type(int mode)
106{
107 unsigned long func;
108
109 if (cs_types[mode].save_func)
110 return 0;
111 func = kallsyms_lookup_funcptr(cs_types[mode].save_func_name);
112 if (!func) {
113 printk(KERN_WARNING "LTTng: symbol lookup failed: %s\n",
114 cs_types[mode].save_func_name);
115 return -EINVAL;
116 }
117 cs_types[mode].save_func = (void *) func;
118 return 0;
119}
120
121static
122struct stack_trace *stack_trace_context(struct lttng_ctx_field *field,
123 struct lib_ring_buffer_ctx *ctx)
124{
125 int nesting;
126 struct lttng_cs *cs;
3c1a57e8 127 struct field_data *fdata = field->priv;
2fa2d39a
FG
128
129 /*
130 * get_cpu() is not required, preemption is already
131 * disabled while event is written.
132 *
133 * max nesting is checked in lib_ring_buffer_get_cpu().
134 * Check it again as a safety net.
135 */
136 cs = per_cpu_ptr(fdata->cs_percpu, ctx->cpu);
137 nesting = per_cpu(lib_ring_buffer_nesting, ctx->cpu) - 1;
138 if (nesting >= RING_BUFFER_MAX_NESTING) {
139 return NULL;
140 }
64cc198b 141 return &cs->level[nesting].stack_trace;
2fa2d39a
FG
142}
143
144/*
145 * In order to reserve the correct size, the callstack is computed. The
146 * resulting callstack is saved to be accessed in the record step.
147 */
148static
149size_t lttng_callstack_get_size(size_t offset, struct lttng_ctx_field *field,
150 struct lib_ring_buffer_ctx *ctx,
151 struct lttng_channel *chan)
152{
153 size_t size = 0;
154 struct stack_trace *trace;
3c1a57e8 155 struct field_data *fdata = field->priv;
2fa2d39a
FG
156
157 /* do not write data if no space is available */
158 trace = stack_trace_context(field, ctx);
159 if (!trace)
160 return 0;
161
162 /* reset stack trace, no need to clear memory */
163 trace->nr_entries = 0;
164
165 /* do the real work and reserve space */
166 cs_types[fdata->mode].save_func(trace);
ea15538d
MD
167 /*
168 * Remove final ULONG_MAX delimiter. If we cannot find it, add
169 * our own marker to show that the stack is incomplete. This is
170 * more compact for a trace.
171 */
172 if (trace->nr_entries > 0
173 && trace->entries[trace->nr_entries - 1] == ULONG_MAX) {
174 trace->nr_entries--;
175 }
2fa2d39a
FG
176 size += lib_ring_buffer_align(offset, lttng_alignof(unsigned int));
177 size += sizeof(unsigned int);
178 size += lib_ring_buffer_align(offset, lttng_alignof(unsigned long));
179 size += sizeof(unsigned long) * trace->nr_entries;
ea15538d
MD
180 /* Add our own ULONG_MAX delimiter to show incomplete stack. */
181 if (trace->nr_entries == trace->max_entries)
182 size += sizeof(unsigned long);
2fa2d39a
FG
183 return size;
184}
185
186static
187void lttng_callstack_record(struct lttng_ctx_field *field,
64cc198b
MD
188 struct lib_ring_buffer_ctx *ctx,
189 struct lttng_channel *chan)
2fa2d39a
FG
190{
191 struct stack_trace *trace = stack_trace_context(field, ctx);
ea15538d 192 unsigned int nr_seq_entries;
2fa2d39a
FG
193
194 if (!trace)
195 return;
196 lib_ring_buffer_align_ctx(ctx, lttng_alignof(unsigned int));
ea15538d
MD
197 nr_seq_entries = trace->nr_entries;
198 if (trace->nr_entries == trace->max_entries)
199 nr_seq_entries++;
200 chan->ops->event_write(ctx, &nr_seq_entries, sizeof(unsigned int));
2fa2d39a
FG
201 lib_ring_buffer_align_ctx(ctx, lttng_alignof(unsigned long));
202 chan->ops->event_write(ctx, trace->entries,
203 sizeof(unsigned long) * trace->nr_entries);
ea15538d
MD
204 /* Add our own ULONG_MAX delimiter to show incomplete stack. */
205 if (trace->nr_entries == trace->max_entries) {
206 unsigned long delim = ULONG_MAX;
207
208 chan->ops->event_write(ctx, &delim, sizeof(unsigned long));
209 }
2fa2d39a
FG
210}
211
212static
213void field_data_free(struct field_data *fdata)
214{
2fa2d39a
FG
215 if (!fdata)
216 return;
2fa2d39a
FG
217 free_percpu(fdata->cs_percpu);
218 kfree(fdata);
219}
220
221static
64cc198b 222struct field_data __percpu *field_data_create(int type)
2fa2d39a
FG
223{
224 int cpu, i;
2fa2d39a 225 struct lttng_cs __percpu *cs_set;
64cc198b 226 struct field_data *fdata;
2fa2d39a 227
64cc198b 228 fdata = kzalloc(sizeof(*fdata), GFP_KERNEL);
2fa2d39a
FG
229 if (!fdata)
230 return NULL;
231 cs_set = alloc_percpu(struct lttng_cs);
232 if (!cs_set)
233 goto error_alloc;
234
235 fdata->cs_percpu = cs_set;
236 for_each_possible_cpu(cpu) {
64cc198b
MD
237 struct lttng_cs *cs;
238
2fa2d39a
FG
239 cs = per_cpu_ptr(cs_set, cpu);
240 for (i = 0; i < RING_BUFFER_MAX_NESTING; i++) {
64cc198b
MD
241 struct lttng_cs_nesting *level;
242
243 level = &cs->level[i];
244 level->stack_trace.entries = level->entries;
245 level->stack_trace.max_entries = MAX_ENTRIES;
2fa2d39a
FG
246 }
247 }
248 fdata->mode = type;
249 return fdata;
250
251error_alloc:
252 field_data_free(fdata);
253 return NULL;
254}
255
256static
257void lttng_callstack_destroy(struct lttng_ctx_field *field)
258{
3c1a57e8 259 struct field_data *fdata = field->priv;
2fa2d39a
FG
260
261 field_data_free(fdata);
262}
263
264static
265int __lttng_add_callstack_generic(struct lttng_ctx **ctx, int mode)
266{
267 const char *ctx_name = cs_types[mode].name;
268 struct lttng_ctx_field *field;
269 struct field_data *fdata;
270 int ret;
271
272 ret = init_type(mode);
273 if (ret)
274 return ret;
275 field = lttng_append_context(ctx);
276 if (!field)
277 return -ENOMEM;
278 if (lttng_find_context(*ctx, ctx_name)) {
2fa2d39a
FG
279 ret = -EEXIST;
280 goto error_find;
281 }
64cc198b 282 fdata = field_data_create(mode);
2fa2d39a
FG
283 if (!fdata) {
284 ret = -ENOMEM;
285 goto error_create;
286 }
287
288 field->event_field.name = ctx_name;
289 field->event_field.type.atype = atype_sequence;
290 field->event_field.type.u.sequence.elem_type.atype = atype_integer;
291 field->event_field.type.u.sequence.elem_type.u.basic.integer.size = sizeof(unsigned long) * CHAR_BIT;
292 field->event_field.type.u.sequence.elem_type.u.basic.integer.alignment = lttng_alignof(long) * CHAR_BIT;
293 field->event_field.type.u.sequence.elem_type.u.basic.integer.signedness = lttng_is_signed_type(unsigned long);
294 field->event_field.type.u.sequence.elem_type.u.basic.integer.reverse_byte_order = 0;
295 field->event_field.type.u.sequence.elem_type.u.basic.integer.base = 16;
296 field->event_field.type.u.sequence.elem_type.u.basic.integer.encoding = lttng_encode_none;
297
298 field->event_field.type.u.sequence.length_type.atype = atype_integer;
299 field->event_field.type.u.sequence.length_type.u.basic.integer.size = sizeof(unsigned int) * CHAR_BIT;
300 field->event_field.type.u.sequence.length_type.u.basic.integer.alignment = lttng_alignof(unsigned int) * CHAR_BIT;
301 field->event_field.type.u.sequence.length_type.u.basic.integer.signedness = lttng_is_signed_type(unsigned int);
302 field->event_field.type.u.sequence.length_type.u.basic.integer.reverse_byte_order = 0;
303 field->event_field.type.u.sequence.length_type.u.basic.integer.base = 10;
304 field->event_field.type.u.sequence.length_type.u.basic.integer.encoding = lttng_encode_none;
305
306 field->get_size_arg = lttng_callstack_get_size;
307 field->record = lttng_callstack_record;
3c1a57e8 308 field->priv = fdata;
2fa2d39a
FG
309 field->destroy = lttng_callstack_destroy;
310 wrapper_vmalloc_sync_all();
2fa2d39a
FG
311 return 0;
312
313error_create:
314 field_data_free(fdata);
315error_find:
316 lttng_remove_context_field(ctx, field);
317 return ret;
318}
319
320/**
321 * lttng_add_callstack_to_ctx - add callstack event context
322 *
323 * @ctx: the lttng_ctx pointer to initialize
324 * @type: the context type
325 *
326 * Supported callstack type supported:
327 * LTTNG_KERNEL_CONTEXT_CALLSTACK_KERNEL
328 * Records the callstack of the kernel
329 * LTTNG_KERNEL_CONTEXT_CALLSTACK_USER
330 * Records the callstack of the userspace program (from the kernel)
331 *
332 * Return 0 for success, or error code.
333 */
334int lttng_add_callstack_to_ctx(struct lttng_ctx **ctx, int type)
335{
336 switch (type) {
337 case LTTNG_KERNEL_CONTEXT_CALLSTACK_KERNEL:
338 return __lttng_add_callstack_generic(ctx, CALLSTACK_KERNEL);
339 case LTTNG_KERNEL_CONTEXT_CALLSTACK_USER:
340 return __lttng_add_callstack_generic(ctx, CALLSTACK_USER);
341 default:
342 return -EINVAL;
343 }
344}
345EXPORT_SYMBOL_GPL(lttng_add_callstack_to_ctx);
346
347MODULE_LICENSE("GPL and additional rights");
348MODULE_AUTHOR("Francis Giraldeau");
349MODULE_DESCRIPTION("Linux Trace Toolkit Callstack Support");
This page took 0.036373 seconds and 4 git commands to generate.