new ltt-usertrace
[lttv.git] / ltt-usertrace / ltt-usertrace-fast.c
CommitLineData
700d350d 1/* LTTng user-space "fast" library
2 *
3 * This daemon is spawned by each traced thread (to share the mmap).
4 *
5 * Its job is to dump periodically this buffer to disk (when it receives a
6 * SIGUSR1 from its parent).
7 *
8 * It uses the control information in the shared memory area (producer/consumer
9 * count).
10 *
11 * When the parent thread dies (yes, those thing may happen) ;) , this daemon
12 * will flush the last buffer and write it to disk.
13 *
14 * Supplement note for streaming : the daemon is responsible for flushing
15 * periodically the buffer if it is streaming data.
16 *
b09f3215 17 *
700d350d 18 * Notes :
19 * shm memory is typically limited to 4096 units (system wide limit SHMMNI in
20 * /proc/sys/kernel/shmmni). As it requires computation time upon creation, we
21 * do not use it : we will use a shared mmap() instead which is passed through
22 * the fork().
23 * MAP_SHARED mmap segment. Updated when msync or munmap are called.
24 * MAP_ANONYMOUS.
25 * Memory mapped by mmap() is preserved across fork(2), with the same
26 * attributes.
27 *
28 * Eventually, there will be two mode :
29 * * Slow thread spawn : a fork() is done for each new thread. If the process
30 * dies, the data is not lost.
31 * * Fast thread spawn : a pthread_create() is done by the application for each
32 * new thread.
a85b8f41 33 *
34 * We use a timer to check periodically if the parent died. I think it is less
35 * intrusive than a ptrace() on the parent, which would get every signal. The
36 * side effect of this is that we won't be notified if the parent does an
37 * exec(). In this case, we will just sit there until the parent exits.
38 *
39 *
b09f3215 40 * Copyright 2006 Mathieu Desnoyers
41 *
42 */
43
8a9103df 44#define inline inline __attribute__((always_inline))
45
32f2b04a 46#define _GNU_SOURCE
8b30e7bc 47#define LTT_TRACE
976db1b3 48#define LTT_TRACE_FAST
b09f3215 49#include <sys/types.h>
50#include <sys/wait.h>
51#include <unistd.h>
52#include <stdlib.h>
53#include <stdio.h>
54#include <signal.h>
55#include <syscall.h>
56#include <features.h>
57#include <pthread.h>
58#include <malloc.h>
59#include <string.h>
700d350d 60#include <sys/mman.h>
61#include <signal.h>
77b31f39 62#include <sys/stat.h>
63#include <fcntl.h>
64#include <stdlib.h>
65#include <sys/param.h>
47d7d576 66#include <sys/time.h>
8b30e7bc 67#include <errno.h>
77b31f39 68
32f2b04a 69#include <asm/atomic.h>
77b31f39 70#include <asm/timex.h> //for get_cycles()
b09f3215 71
8b30e7bc 72_syscall0(pid_t,gettid)
73
976db1b3 74#include <ltt/ltt-usertrace.h>
b09f3215 75
8a9103df 76#ifdef LTT_SHOW_DEBUG
fcbe1ea8 77#define dbg_printf(...) printf(__VA_ARGS__)
8a9103df 78#else
79#define dbg_printf(...)
80#endif //LTT_SHOW_DEBUG
81
82
32f2b04a 83enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
b09f3215 84
e8efa18d 85/* Writer (the traced application) */
b09f3215 86
e8efa18d 87__thread struct ltt_trace_info *thread_trace_info = NULL;
700d350d 88
e8efa18d 89void ltt_usertrace_fast_buffer_switch(void)
90{
a85b8f41 91 struct ltt_trace_info *tmp = thread_trace_info;
92 if(tmp)
93 kill(tmp->daemon_id, SIGUSR1);
e8efa18d 94}
700d350d 95
77b31f39 96/* The cleanup should never be called from a signal handler */
e8efa18d 97static void ltt_usertrace_fast_cleanup(void *arg)
b09f3215 98{
a85b8f41 99 struct ltt_trace_info *tmp = thread_trace_info;
100 if(tmp) {
101 thread_trace_info = NULL;
102 kill(tmp->daemon_id, SIGUSR2);
103 munmap(tmp, sizeof(*tmp));
104 }
700d350d 105}
b09f3215 106
e8efa18d 107/* Reader (the disk dumper daemon) */
700d350d 108
a85b8f41 109static pid_t traced_pid = 0;
4c992ad5 110static pid_t traced_tid = 0;
e8efa18d 111static int parent_exited = 0;
700d350d 112
e8efa18d 113/* signal handling */
114static void handler_sigusr1(int signo)
700d350d 115{
8a9103df 116 dbg_printf("LTT Signal %d received : parent buffer switch.\n", signo);
e8efa18d 117}
118
119static void handler_sigusr2(int signo)
120{
8a9103df 121 dbg_printf("LTT Signal %d received : parent exited.\n", signo);
e8efa18d 122 parent_exited = 1;
123}
124
125static void handler_sigalarm(int signo)
126{
8a9103df 127 dbg_printf("LTT Signal %d received\n", signo);
e8efa18d 128
a85b8f41 129 if(getppid() != traced_pid) {
e8efa18d 130 /* Parent died */
8a9103df 131 dbg_printf("LTT Parent %lu died, cleaning up\n", traced_pid);
a85b8f41 132 traced_pid = 0;
e8efa18d 133 }
134 alarm(3);
b09f3215 135}
136
47d7d576 137/* Do a buffer switch. Don't switch if buffer is completely empty */
32f2b04a 138static void flush_buffer(struct ltt_buf *ltt_buf, enum force_switch_mode mode)
47d7d576 139{
32f2b04a 140 uint64_t tsc;
141 int offset_begin, offset_end, offset_old;
142 int reserve_commit_diff;
143 int consumed_old, consumed_new;
144 int commit_count, reserve_count;
145 int end_switch_old;
47d7d576 146
32f2b04a 147 do {
148 offset_old = atomic_read(&ltt_buf->offset);
149 offset_begin = offset_old;
150 end_switch_old = 0;
151 tsc = ltt_get_timestamp();
152 if(tsc == 0) {
153 /* Error in getting the timestamp : should not happen : it would
154 * mean we are called from an NMI during a write seqlock on xtime. */
155 return;
156 }
157
158 if(SUBBUF_OFFSET(offset_begin, ltt_buf) != 0) {
159 offset_begin = SUBBUF_ALIGN(offset_begin, ltt_buf);
160 end_switch_old = 1;
161 } else {
162 /* we do not have to switch : buffer is empty */
163 return;
164 }
165 if(mode == FORCE_ACTIVE)
166 offset_begin += ltt_subbuf_header_len(ltt_buf);
167 /* Always begin_switch in FORCE_ACTIVE mode */
168
169 /* Test new buffer integrity */
170 reserve_commit_diff =
171 atomic_read(
172 &ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])
173 - atomic_read(
174 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
175 if(reserve_commit_diff == 0) {
176 /* Next buffer not corrupted. */
177 if(mode == FORCE_ACTIVE
178 && (offset_begin-atomic_read(&ltt_buf->consumed))
179 >= ltt_buf->alloc_size) {
180 /* We do not overwrite non consumed buffers and we are full : ignore
181 switch while tracing is active. */
182 return;
183 }
184 } else {
185 /* Next subbuffer corrupted. Force pushing reader even in normal mode */
186 }
187
188 offset_end = offset_begin;
189 } while(atomic_cmpxchg(&ltt_buf->offset, offset_old, offset_end)
190 != offset_old);
191
192
193 if(mode == FORCE_ACTIVE) {
194 /* Push the reader if necessary */
195 do {
196 consumed_old = atomic_read(&ltt_buf->consumed);
197 /* If buffer is in overwrite mode, push the reader consumed count if
198 the write position has reached it and we are not at the first
199 iteration (don't push the reader farther than the writer).
200 This operation can be done concurrently by many writers in the
201 same buffer, the writer being at the fartest write position sub-buffer
202 index in the buffer being the one which will win this loop. */
203 /* If the buffer is not in overwrite mode, pushing the reader only
204 happen if a sub-buffer is corrupted */
205 if((SUBBUF_TRUNC(offset_end, ltt_buf)
206 - SUBBUF_TRUNC(consumed_old, ltt_buf))
207 >= ltt_buf->alloc_size)
208 consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf);
209 else {
210 consumed_new = consumed_old;
211 break;
212 }
213 } while(atomic_cmpxchg(&ltt_buf->consumed, consumed_old, consumed_new)
214 != consumed_old);
215
216 if(consumed_old != consumed_new) {
217 /* Reader pushed : we are the winner of the push, we can therefore
218 reequilibrate reserve and commit. Atomic increment of the commit
219 count permits other writers to play around with this variable
220 before us. We keep track of corrupted_subbuffers even in overwrite
221 mode :
222 we never want to write over a non completely committed sub-buffer :
223 possible causes : the buffer size is too low compared to the unordered
224 data input, or there is a writer who died between the reserve and the
225 commit. */
226 if(reserve_commit_diff) {
227 /* We have to alter the sub-buffer commit count : a sub-buffer is
228 corrupted */
229 atomic_add(reserve_commit_diff,
230 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
231 atomic_inc(&ltt_buf->corrupted_subbuffers);
232 }
233 }
234 }
235
236 /* Always switch */
237
238 if(end_switch_old) {
239 /* old subbuffer */
240 /* Concurrency safe because we are the last and only thread to alter this
241 sub-buffer. As long as it is not delivered and read, no other thread can
242 alter the offset, alter the reserve_count or call the
243 client_buffer_end_callback on this sub-buffer.
244 The only remaining threads could be the ones with pending commits. They
245 will have to do the deliver themself.
246 Not concurrency safe in overwrite mode. We detect corrupted subbuffers with
247 commit and reserve counts. We keep a corrupted sub-buffers count and push
248 the readers across these sub-buffers.
249 Not concurrency safe if a writer is stalled in a subbuffer and
250 another writer switches in, finding out it's corrupted. The result will be
251 than the old (uncommited) subbuffer will be declared corrupted, and that
252 the new subbuffer will be declared corrupted too because of the commit
253 count adjustment.
254 Offset old should never be 0. */
255 ltt_buffer_end_callback(ltt_buf, tsc, offset_old,
256 SUBBUF_INDEX((offset_old), ltt_buf));
257 /* Setting this reserve_count will allow the sub-buffer to be delivered by
258 the last committer. */
259 reserve_count = atomic_add_return((SUBBUF_OFFSET((offset_old-1),
260 ltt_buf) + 1),
261 &ltt_buf->reserve_count[SUBBUF_INDEX((offset_old),
262 ltt_buf)]);
263 if(reserve_count == atomic_read(
264 &ltt_buf->commit_count[SUBBUF_INDEX((offset_old), ltt_buf)])) {
265 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX((offset_old), ltt_buf), NULL);
266 }
267 }
268
269 if(mode == FORCE_ACTIVE) {
270 /* New sub-buffer */
271 /* This code can be executed unordered : writers may already have written
272 to the sub-buffer before this code gets executed, caution. */
273 /* The commit makes sure that this code is executed before the deliver
274 of this sub-buffer */
275 ltt_buffer_begin_callback(ltt_buf, tsc, SUBBUF_INDEX(offset_begin, ltt_buf));
276 commit_count = atomic_add_return(ltt_subbuf_header_len(ltt_buf),
277 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
278 /* Check if the written buffer has to be delivered */
279 if(commit_count == atomic_read(
280 &ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])) {
281 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX(offset_begin, ltt_buf), NULL);
282 }
283 }
47d7d576 284
285}
286
287static inline int ltt_buffer_get(struct ltt_buf *ltt_buf,
288 unsigned int *offset)
289{
290 unsigned int consumed_old, consumed_idx;
291 consumed_old = atomic_read(&ltt_buf->consumed);
292 consumed_idx = SUBBUF_INDEX(consumed_old, ltt_buf);
293
294 if(atomic_read(&ltt_buf->commit_count[consumed_idx])
295 != atomic_read(&ltt_buf->reserve_count[consumed_idx])) {
296 return -EAGAIN;
297 }
298 if((SUBBUF_TRUNC(atomic_read(&ltt_buf->offset), ltt_buf)
299 -SUBBUF_TRUNC(consumed_old, ltt_buf)) == 0) {
300 return -EAGAIN;
301 }
302
303 *offset = consumed_old;
304
305 return 0;
306}
307
308static inline int ltt_buffer_put(struct ltt_buf *ltt_buf,
309 unsigned int offset)
310{
311 unsigned int consumed_old, consumed_new;
312 int ret;
313
314 consumed_old = offset;
315 consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf);
316 if(atomic_cmpxchg(&ltt_buf->consumed, consumed_old, consumed_new)
317 != consumed_old) {
318 /* We have been pushed by the writer : the last buffer read _is_
319 * corrupted!
320 * It can also happen if this is a buffer we never got. */
321 return -EIO;
322 } else {
e0cd021d 323 if(traced_pid == 0 || parent_exited) return 0;
324
85b94320 325 ret = sem_post(&ltt_buf->writer_sem);
326 if(ret < 0) {
327 printf("error in sem_post");
47d7d576 328 }
329 }
330}
331
47d7d576 332static int read_subbuffer(struct ltt_buf *ltt_buf, int fd)
333{
32f2b04a 334 unsigned int consumed_old;
47d7d576 335 int err;
8a9103df 336 dbg_printf("LTT read buffer\n");
47d7d576 337
338
32f2b04a 339 err = ltt_buffer_get(ltt_buf, &consumed_old);
5ffa9d14 340 if(err != 0) {
8a9103df 341 if(err != -EAGAIN) dbg_printf("LTT Reserving sub buffer failed\n");
47d7d576 342 goto get_error;
343 }
344
345 err = TEMP_FAILURE_RETRY(write(fd,
346 ltt_buf->start
347 + (consumed_old & ((ltt_buf->alloc_size)-1)),
348 ltt_buf->subbuf_size));
349
350 if(err < 0) {
351 perror("Error in writing to file");
352 goto write_error;
353 }
354#if 0
355 err = fsync(pair->trace);
356 if(err < 0) {
357 ret = errno;
358 perror("Error in writing to file");
359 goto write_error;
360 }
361#endif //0
362write_error:
32f2b04a 363 err = ltt_buffer_put(ltt_buf, consumed_old);
47d7d576 364
365 if(err != 0) {
366 if(err == -EIO) {
8a9103df 367 dbg_printf("Reader has been pushed by the writer, last subbuffer corrupted.\n");
47d7d576 368 /* FIXME : we may delete the last written buffer if we wish. */
369 }
370 goto get_error;
371 }
372
373get_error:
374 return err;
375}
e8efa18d 376
a85b8f41 377/* This function is called by ltt_rw_init which has signals blocked */
700d350d 378static void ltt_usertrace_fast_daemon(struct ltt_trace_info *shared_trace_info,
4c992ad5 379 sigset_t oldset, pid_t l_traced_pid, pthread_t l_traced_tid)
700d350d 380{
381 struct sigaction act;
382 int ret;
e0cd021d 383 int fd_process = -1;
77b31f39 384 char outfile_name[PATH_MAX];
385 char identifier_name[PATH_MAX];
386
a85b8f41 387 traced_pid = l_traced_pid;
4c992ad5 388 traced_tid = l_traced_tid;
e8efa18d 389
8a9103df 390 dbg_printf("LTT ltt_usertrace_fast_daemon : init is %d, pid is %lu, traced_pid is %lu, traced_tid is %lu\n",
4c992ad5 391 shared_trace_info->init, getpid(), traced_pid, traced_tid);
700d350d 392
e8efa18d 393 act.sa_handler = handler_sigusr1;
700d350d 394 act.sa_flags = 0;
395 sigemptyset(&(act.sa_mask));
396 sigaddset(&(act.sa_mask), SIGUSR1);
397 sigaction(SIGUSR1, &act, NULL);
e8efa18d 398
399 act.sa_handler = handler_sigusr2;
400 act.sa_flags = 0;
401 sigemptyset(&(act.sa_mask));
402 sigaddset(&(act.sa_mask), SIGUSR2);
403 sigaction(SIGUSR2, &act, NULL);
404
405 act.sa_handler = handler_sigalarm;
406 act.sa_flags = 0;
407 sigemptyset(&(act.sa_mask));
408 sigaddset(&(act.sa_mask), SIGALRM);
409 sigaction(SIGALRM, &act, NULL);
410
e8efa18d 411 alarm(3);
412
77b31f39 413 /* Open output files */
414 umask(00000);
415 ret = mkdir(LTT_USERTRACE_ROOT, 0777);
416 if(ret < 0 && errno != EEXIST) {
417 perror("LTT Error in creating output (mkdir)");
418 exit(-1);
419 }
420 ret = chdir(LTT_USERTRACE_ROOT);
421 if(ret < 0) {
422 perror("LTT Error in creating output (chdir)");
423 exit(-1);
424 }
425 snprintf(identifier_name, PATH_MAX-1, "%lu.%lu.%llu",
46903a9a 426 traced_tid, traced_pid, get_cycles());
b402c055 427 snprintf(outfile_name, PATH_MAX-1, "process-%s", identifier_name);
34a97fdc 428
429 /* Wait for the first signal before creating files */
430 ret = sigsuspend(&oldset);
431 if(ret != -1) {
432 perror("LTT Error in sigsuspend\n");
433 }
34a97fdc 434
b402c055 435#ifndef LTT_NULL_OUTPUT_TEST
436 fd_process = creat(outfile_name, 0644);
437#else
438 /* NULL test */
439 ret = symlink("/dev/null", outfile_name);
440 if(ret < 0) {
441 perror("error in symlink");
442 }
443 fd_process = open(outfile_name, O_WRONLY);
444 if(fd_process < 0) {
445 perror("Error in open");
446 }
447#endif //LTT_NULL_OUTPUT_TEST
77b31f39 448
700d350d 449 while(1) {
a85b8f41 450 if(traced_pid == 0) break; /* parent died */
e8efa18d 451 if(parent_exited) break;
8a9103df 452 dbg_printf("LTT Doing a buffer switch read. pid is : %lu\n", getpid());
7076f16d 453
47d7d576 454 do {
b402c055 455 ret = read_subbuffer(&shared_trace_info->channel.process, fd_process);
47d7d576 456 } while(ret == 0);
fcbe1ea8 457
34a97fdc 458 ret = sigsuspend(&oldset);
459 if(ret != -1) {
460 perror("LTT Error in sigsuspend\n");
461 }
700d350d 462 }
32f2b04a 463 /* The parent thread is dead and we have finished with the buffer */
464
465 /* Buffer force switch (flush). Using FLUSH instead of ACTIVE because we know
466 * there is no writer. */
b402c055 467 flush_buffer(&shared_trace_info->channel.process, FORCE_FLUSH);
47d7d576 468 do {
b402c055 469 ret = read_subbuffer(&shared_trace_info->channel.process, fd_process);
47d7d576 470 } while(ret == 0);
471
e0cd021d 472 if(fd_process != -1)
473 close(fd_process);
77b31f39 474
85b94320 475 ret = sem_destroy(&shared_trace_info->channel.process.writer_sem);
476 if(ret < 0) {
477 perror("error in sem_destroy");
478 }
e8efa18d 479 munmap(shared_trace_info, sizeof(*shared_trace_info));
480
481 exit(0);
700d350d 482}
b09f3215 483
e8efa18d 484
485/* Reader-writer initialization */
486
487static enum ltt_process_role { LTT_ROLE_WRITER, LTT_ROLE_READER }
488 role = LTT_ROLE_WRITER;
489
490
491void ltt_rw_init(void)
b09f3215 492{
700d350d 493 pid_t pid;
494 struct ltt_trace_info *shared_trace_info;
495 int ret;
496 sigset_t set, oldset;
a85b8f41 497 pid_t l_traced_pid = getpid();
4c992ad5 498 pid_t l_traced_tid = gettid();
700d350d 499
500 /* parent : create the shared memory map */
a85b8f41 501 shared_trace_info = mmap(0, sizeof(*thread_trace_info),
700d350d 502 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, 0, 0);
8a9103df 503 shared_trace_info->init=0;
504 shared_trace_info->filter=0;
505 shared_trace_info->daemon_id=0;
506 shared_trace_info->nesting=0;
b402c055 507 memset(&shared_trace_info->channel.process, 0,
508 sizeof(shared_trace_info->channel.process));
85b94320 509 //Need NPTL!
510 ret = sem_init(&shared_trace_info->channel.process.writer_sem, 1,
511 LTT_N_SUBBUFS);
512 if(ret < 0) {
513 perror("error in sem_init");
514 }
b402c055 515 shared_trace_info->channel.process.alloc_size = LTT_BUF_SIZE_PROCESS;
516 shared_trace_info->channel.process.subbuf_size = LTT_SUBBUF_SIZE_PROCESS;
517 shared_trace_info->channel.process.start =
518 shared_trace_info->channel.process_buf;
519 ltt_buffer_begin_callback(&shared_trace_info->channel.process,
5ffa9d14 520 ltt_get_timestamp(), 0);
521
a85b8f41 522 shared_trace_info->init = 1;
700d350d 523
524 /* Disable signals */
525 ret = sigfillset(&set);
526 if(ret) {
8a9103df 527 dbg_printf("LTT Error in sigfillset\n");
700d350d 528 }
529
700d350d 530 ret = pthread_sigmask(SIG_BLOCK, &set, &oldset);
531 if(ret) {
8a9103df 532 dbg_printf("LTT Error in pthread_sigmask\n");
700d350d 533 }
a85b8f41 534
700d350d 535 pid = fork();
536 if(pid > 0) {
537 /* Parent */
a85b8f41 538 shared_trace_info->daemon_id = pid;
539 thread_trace_info = shared_trace_info;
700d350d 540
541 /* Enable signals */
542 ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
543 if(ret) {
8a9103df 544 dbg_printf("LTT Error in pthread_sigmask\n");
700d350d 545 }
546 } else if(pid == 0) {
b5d612cb 547 pid_t sid;
700d350d 548 /* Child */
e8efa18d 549 role = LTT_ROLE_READER;
b5d612cb 550 sid = setsid();
b402c055 551 //Not a good idea to renice, unless futex wait eventually implement
552 //priority inheritence.
553 //ret = nice(1);
554 //if(ret < 0) {
555 // perror("Error in nice");
556 //}
b5d612cb 557 if(sid < 0) {
558 perror("Error setting sid");
559 }
77b31f39 560 ltt_usertrace_fast_daemon(shared_trace_info, oldset, l_traced_pid,
4c992ad5 561 l_traced_tid);
700d350d 562 /* Should never return */
563 exit(-1);
564 } else if(pid < 0) {
565 /* fork error */
a35eaa9c 566 perror("LTT Error in forking ltt-usertrace-fast");
700d350d 567 }
b09f3215 568}
569
e8efa18d 570static __thread struct _pthread_cleanup_buffer cleanup_buffer;
571
572void ltt_thread_init(void)
573{
574 _pthread_cleanup_push(&cleanup_buffer, ltt_usertrace_fast_cleanup, NULL);
575 ltt_rw_init();
576}
577
04180f7f 578void __attribute__((constructor)) __ltt_usertrace_fast_init(void)
b09f3215 579{
8a9103df 580 dbg_printf("LTT usertrace-fast init\n");
b09f3215 581
e8efa18d 582 ltt_rw_init();
700d350d 583}
584
585void __attribute__((destructor)) __ltt_usertrace_fast_fini(void)
586{
e8efa18d 587 if(role == LTT_ROLE_WRITER) {
8a9103df 588 dbg_printf("LTT usertrace-fast fini\n");
e8efa18d 589 ltt_usertrace_fast_cleanup(NULL);
590 }
b09f3215 591}
592
This page took 0.049045 seconds and 4 git commands to generate.