update quickstart
[lttv.git] / ltt-usertrace / ltt-usertrace-fast.c
CommitLineData
700d350d 1/* LTTng user-space "fast" library
2 *
3 * This daemon is spawned by each traced thread (to share the mmap).
4 *
5 * Its job is to dump periodically this buffer to disk (when it receives a
6 * SIGUSR1 from its parent).
7 *
8 * It uses the control information in the shared memory area (producer/consumer
9 * count).
10 *
11 * When the parent thread dies (yes, those thing may happen) ;) , this daemon
12 * will flush the last buffer and write it to disk.
13 *
14 * Supplement note for streaming : the daemon is responsible for flushing
15 * periodically the buffer if it is streaming data.
16 *
b09f3215 17 *
700d350d 18 * Notes :
19 * shm memory is typically limited to 4096 units (system wide limit SHMMNI in
20 * /proc/sys/kernel/shmmni). As it requires computation time upon creation, we
21 * do not use it : we will use a shared mmap() instead which is passed through
22 * the fork().
23 * MAP_SHARED mmap segment. Updated when msync or munmap are called.
24 * MAP_ANONYMOUS.
25 * Memory mapped by mmap() is preserved across fork(2), with the same
26 * attributes.
27 *
28 * Eventually, there will be two mode :
29 * * Slow thread spawn : a fork() is done for each new thread. If the process
30 * dies, the data is not lost.
31 * * Fast thread spawn : a pthread_create() is done by the application for each
32 * new thread.
a85b8f41 33 *
34 * We use a timer to check periodically if the parent died. I think it is less
35 * intrusive than a ptrace() on the parent, which would get every signal. The
36 * side effect of this is that we won't be notified if the parent does an
37 * exec(). In this case, we will just sit there until the parent exits.
38 *
39 *
b09f3215 40 * Copyright 2006 Mathieu Desnoyers
41 *
42 */
43
8a9103df 44#define inline inline __attribute__((always_inline))
45
32f2b04a 46#define _GNU_SOURCE
8b30e7bc 47#define LTT_TRACE
976db1b3 48#define LTT_TRACE_FAST
b09f3215 49#include <unistd.h>
50#include <stdlib.h>
51#include <stdio.h>
52#include <signal.h>
53#include <syscall.h>
54#include <features.h>
55#include <pthread.h>
56#include <malloc.h>
57#include <string.h>
700d350d 58#include <signal.h>
77b31f39 59#include <fcntl.h>
60#include <stdlib.h>
4359c2bb 61#include <errno.h>
77b31f39 62#include <sys/param.h>
47d7d576 63#include <sys/time.h>
4359c2bb 64#include <sys/types.h>
65#include <sys/wait.h>
66#include <sys/stat.h>
67#include <sys/mman.h>
68#include <linux/unistd.h>
77b31f39 69
d86395c3 70// included with hack for powerpc in ltt-usertrace.h #include <asm/atomic.h>
77b31f39 71#include <asm/timex.h> //for get_cycles()
b09f3215 72
976db1b3 73#include <ltt/ltt-usertrace.h>
b09f3215 74
4359c2bb 75_syscall0(pid_t,gettid);
76
8a9103df 77#ifdef LTT_SHOW_DEBUG
fcbe1ea8 78#define dbg_printf(...) printf(__VA_ARGS__)
8a9103df 79#else
80#define dbg_printf(...)
81#endif //LTT_SHOW_DEBUG
82
83
32f2b04a 84enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
b09f3215 85
e8efa18d 86/* Writer (the traced application) */
b09f3215 87
e8efa18d 88__thread struct ltt_trace_info *thread_trace_info = NULL;
700d350d 89
e8efa18d 90void ltt_usertrace_fast_buffer_switch(void)
91{
a85b8f41 92 struct ltt_trace_info *tmp = thread_trace_info;
93 if(tmp)
94 kill(tmp->daemon_id, SIGUSR1);
e8efa18d 95}
700d350d 96
77b31f39 97/* The cleanup should never be called from a signal handler */
e8efa18d 98static void ltt_usertrace_fast_cleanup(void *arg)
b09f3215 99{
a85b8f41 100 struct ltt_trace_info *tmp = thread_trace_info;
101 if(tmp) {
102 thread_trace_info = NULL;
103 kill(tmp->daemon_id, SIGUSR2);
104 munmap(tmp, sizeof(*tmp));
105 }
700d350d 106}
b09f3215 107
e8efa18d 108/* Reader (the disk dumper daemon) */
700d350d 109
a85b8f41 110static pid_t traced_pid = 0;
4c992ad5 111static pid_t traced_tid = 0;
e8efa18d 112static int parent_exited = 0;
c095a20d 113static int fd_process = -1;
114static char outfile_name[PATH_MAX];
115static char identifier_name[PATH_MAX];
700d350d 116
e8efa18d 117/* signal handling */
118static void handler_sigusr1(int signo)
700d350d 119{
8a9103df 120 dbg_printf("LTT Signal %d received : parent buffer switch.\n", signo);
e8efa18d 121}
122
123static void handler_sigusr2(int signo)
124{
8a9103df 125 dbg_printf("LTT Signal %d received : parent exited.\n", signo);
e8efa18d 126 parent_exited = 1;
127}
128
129static void handler_sigalarm(int signo)
130{
8a9103df 131 dbg_printf("LTT Signal %d received\n", signo);
e8efa18d 132
a85b8f41 133 if(getppid() != traced_pid) {
e8efa18d 134 /* Parent died */
8a9103df 135 dbg_printf("LTT Parent %lu died, cleaning up\n", traced_pid);
a85b8f41 136 traced_pid = 0;
e8efa18d 137 }
138 alarm(3);
b09f3215 139}
140
47d7d576 141/* Do a buffer switch. Don't switch if buffer is completely empty */
32f2b04a 142static void flush_buffer(struct ltt_buf *ltt_buf, enum force_switch_mode mode)
47d7d576 143{
32f2b04a 144 uint64_t tsc;
145 int offset_begin, offset_end, offset_old;
146 int reserve_commit_diff;
147 int consumed_old, consumed_new;
148 int commit_count, reserve_count;
149 int end_switch_old;
47d7d576 150
32f2b04a 151 do {
152 offset_old = atomic_read(&ltt_buf->offset);
153 offset_begin = offset_old;
154 end_switch_old = 0;
155 tsc = ltt_get_timestamp();
156 if(tsc == 0) {
157 /* Error in getting the timestamp : should not happen : it would
158 * mean we are called from an NMI during a write seqlock on xtime. */
159 return;
160 }
161
162 if(SUBBUF_OFFSET(offset_begin, ltt_buf) != 0) {
163 offset_begin = SUBBUF_ALIGN(offset_begin, ltt_buf);
164 end_switch_old = 1;
165 } else {
166 /* we do not have to switch : buffer is empty */
167 return;
168 }
169 if(mode == FORCE_ACTIVE)
170 offset_begin += ltt_subbuf_header_len(ltt_buf);
171 /* Always begin_switch in FORCE_ACTIVE mode */
172
173 /* Test new buffer integrity */
174 reserve_commit_diff =
175 atomic_read(
176 &ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])
177 - atomic_read(
178 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
179 if(reserve_commit_diff == 0) {
180 /* Next buffer not corrupted. */
181 if(mode == FORCE_ACTIVE
182 && (offset_begin-atomic_read(&ltt_buf->consumed))
183 >= ltt_buf->alloc_size) {
184 /* We do not overwrite non consumed buffers and we are full : ignore
185 switch while tracing is active. */
186 return;
187 }
188 } else {
189 /* Next subbuffer corrupted. Force pushing reader even in normal mode */
190 }
191
192 offset_end = offset_begin;
193 } while(atomic_cmpxchg(&ltt_buf->offset, offset_old, offset_end)
194 != offset_old);
195
196
197 if(mode == FORCE_ACTIVE) {
198 /* Push the reader if necessary */
199 do {
200 consumed_old = atomic_read(&ltt_buf->consumed);
201 /* If buffer is in overwrite mode, push the reader consumed count if
202 the write position has reached it and we are not at the first
203 iteration (don't push the reader farther than the writer).
204 This operation can be done concurrently by many writers in the
205 same buffer, the writer being at the fartest write position sub-buffer
206 index in the buffer being the one which will win this loop. */
207 /* If the buffer is not in overwrite mode, pushing the reader only
208 happen if a sub-buffer is corrupted */
680b9daa 209 if((SUBBUF_TRUNC(offset_end-1, ltt_buf)
32f2b04a 210 - SUBBUF_TRUNC(consumed_old, ltt_buf))
211 >= ltt_buf->alloc_size)
212 consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf);
213 else {
214 consumed_new = consumed_old;
215 break;
216 }
217 } while(atomic_cmpxchg(&ltt_buf->consumed, consumed_old, consumed_new)
218 != consumed_old);
219
220 if(consumed_old != consumed_new) {
221 /* Reader pushed : we are the winner of the push, we can therefore
222 reequilibrate reserve and commit. Atomic increment of the commit
223 count permits other writers to play around with this variable
224 before us. We keep track of corrupted_subbuffers even in overwrite
225 mode :
226 we never want to write over a non completely committed sub-buffer :
227 possible causes : the buffer size is too low compared to the unordered
228 data input, or there is a writer who died between the reserve and the
229 commit. */
230 if(reserve_commit_diff) {
231 /* We have to alter the sub-buffer commit count : a sub-buffer is
232 corrupted */
233 atomic_add(reserve_commit_diff,
234 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
235 atomic_inc(&ltt_buf->corrupted_subbuffers);
236 }
237 }
238 }
239
240 /* Always switch */
241
242 if(end_switch_old) {
243 /* old subbuffer */
244 /* Concurrency safe because we are the last and only thread to alter this
245 sub-buffer. As long as it is not delivered and read, no other thread can
246 alter the offset, alter the reserve_count or call the
247 client_buffer_end_callback on this sub-buffer.
248 The only remaining threads could be the ones with pending commits. They
249 will have to do the deliver themself.
250 Not concurrency safe in overwrite mode. We detect corrupted subbuffers with
251 commit and reserve counts. We keep a corrupted sub-buffers count and push
252 the readers across these sub-buffers.
253 Not concurrency safe if a writer is stalled in a subbuffer and
254 another writer switches in, finding out it's corrupted. The result will be
255 than the old (uncommited) subbuffer will be declared corrupted, and that
256 the new subbuffer will be declared corrupted too because of the commit
257 count adjustment.
258 Offset old should never be 0. */
259 ltt_buffer_end_callback(ltt_buf, tsc, offset_old,
260 SUBBUF_INDEX((offset_old), ltt_buf));
261 /* Setting this reserve_count will allow the sub-buffer to be delivered by
262 the last committer. */
263 reserve_count = atomic_add_return((SUBBUF_OFFSET((offset_old-1),
264 ltt_buf) + 1),
265 &ltt_buf->reserve_count[SUBBUF_INDEX((offset_old),
266 ltt_buf)]);
267 if(reserve_count == atomic_read(
268 &ltt_buf->commit_count[SUBBUF_INDEX((offset_old), ltt_buf)])) {
269 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX((offset_old), ltt_buf), NULL);
270 }
271 }
272
273 if(mode == FORCE_ACTIVE) {
274 /* New sub-buffer */
275 /* This code can be executed unordered : writers may already have written
276 to the sub-buffer before this code gets executed, caution. */
277 /* The commit makes sure that this code is executed before the deliver
278 of this sub-buffer */
279 ltt_buffer_begin_callback(ltt_buf, tsc, SUBBUF_INDEX(offset_begin, ltt_buf));
280 commit_count = atomic_add_return(ltt_subbuf_header_len(ltt_buf),
281 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
282 /* Check if the written buffer has to be delivered */
283 if(commit_count == atomic_read(
284 &ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])) {
285 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX(offset_begin, ltt_buf), NULL);
286 }
287 }
47d7d576 288
289}
290
c095a20d 291
292static int open_output_files(void)
293{
294 int ret;
295 int fd;
296 /* Open output files */
297 umask(00000);
298 ret = mkdir(LTT_USERTRACE_ROOT, 0777);
299 if(ret < 0 && errno != EEXIST) {
300 perror("LTT Error in creating output (mkdir)");
301 exit(-1);
302 }
303 ret = chdir(LTT_USERTRACE_ROOT);
304 if(ret < 0) {
305 perror("LTT Error in creating output (chdir)");
306 exit(-1);
307 }
308 snprintf(identifier_name, PATH_MAX-1, "%lu.%lu.%llu",
309 traced_tid, traced_pid, get_cycles());
310 snprintf(outfile_name, PATH_MAX-1, "process-%s", identifier_name);
311
312#ifndef LTT_NULL_OUTPUT_TEST
313 fd = creat(outfile_name, 0644);
314#else
315 /* NULL test */
316 ret = symlink("/dev/null", outfile_name);
317 if(ret < 0) {
318 perror("error in symlink");
319 exit(-1);
320 }
321 fd = open(outfile_name, O_WRONLY);
322 if(fd_process < 0) {
323 perror("Error in open");
324 exit(-1);
325 }
326#endif //LTT_NULL_OUTPUT_TEST
327 return fd;
328}
329
47d7d576 330static inline int ltt_buffer_get(struct ltt_buf *ltt_buf,
331 unsigned int *offset)
332{
333 unsigned int consumed_old, consumed_idx;
334 consumed_old = atomic_read(&ltt_buf->consumed);
335 consumed_idx = SUBBUF_INDEX(consumed_old, ltt_buf);
336
337 if(atomic_read(&ltt_buf->commit_count[consumed_idx])
338 != atomic_read(&ltt_buf->reserve_count[consumed_idx])) {
339 return -EAGAIN;
340 }
341 if((SUBBUF_TRUNC(atomic_read(&ltt_buf->offset), ltt_buf)
342 -SUBBUF_TRUNC(consumed_old, ltt_buf)) == 0) {
343 return -EAGAIN;
344 }
345
346 *offset = consumed_old;
347
348 return 0;
349}
350
351static inline int ltt_buffer_put(struct ltt_buf *ltt_buf,
352 unsigned int offset)
353{
354 unsigned int consumed_old, consumed_new;
355 int ret;
356
357 consumed_old = offset;
358 consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf);
359 if(atomic_cmpxchg(&ltt_buf->consumed, consumed_old, consumed_new)
360 != consumed_old) {
361 /* We have been pushed by the writer : the last buffer read _is_
362 * corrupted!
363 * It can also happen if this is a buffer we never got. */
364 return -EIO;
365 } else {
e0cd021d 366 if(traced_pid == 0 || parent_exited) return 0;
367
85b94320 368 ret = sem_post(&ltt_buf->writer_sem);
369 if(ret < 0) {
370 printf("error in sem_post");
47d7d576 371 }
372 }
373}
374
47d7d576 375static int read_subbuffer(struct ltt_buf *ltt_buf, int fd)
376{
32f2b04a 377 unsigned int consumed_old;
47d7d576 378 int err;
8a9103df 379 dbg_printf("LTT read buffer\n");
47d7d576 380
381
32f2b04a 382 err = ltt_buffer_get(ltt_buf, &consumed_old);
5ffa9d14 383 if(err != 0) {
8a9103df 384 if(err != -EAGAIN) dbg_printf("LTT Reserving sub buffer failed\n");
47d7d576 385 goto get_error;
386 }
c095a20d 387 if(fd_process == -1) {
388 fd_process = fd = open_output_files();
389 }
47d7d576 390
391 err = TEMP_FAILURE_RETRY(write(fd,
392 ltt_buf->start
393 + (consumed_old & ((ltt_buf->alloc_size)-1)),
394 ltt_buf->subbuf_size));
395
396 if(err < 0) {
397 perror("Error in writing to file");
398 goto write_error;
399 }
400#if 0
401 err = fsync(pair->trace);
402 if(err < 0) {
403 ret = errno;
404 perror("Error in writing to file");
405 goto write_error;
406 }
407#endif //0
408write_error:
32f2b04a 409 err = ltt_buffer_put(ltt_buf, consumed_old);
47d7d576 410
411 if(err != 0) {
412 if(err == -EIO) {
8a9103df 413 dbg_printf("Reader has been pushed by the writer, last subbuffer corrupted.\n");
47d7d576 414 /* FIXME : we may delete the last written buffer if we wish. */
415 }
416 goto get_error;
417 }
418
419get_error:
420 return err;
421}
e8efa18d 422
a85b8f41 423/* This function is called by ltt_rw_init which has signals blocked */
700d350d 424static void ltt_usertrace_fast_daemon(struct ltt_trace_info *shared_trace_info,
4c992ad5 425 sigset_t oldset, pid_t l_traced_pid, pthread_t l_traced_tid)
700d350d 426{
427 struct sigaction act;
428 int ret;
77b31f39 429
a85b8f41 430 traced_pid = l_traced_pid;
4c992ad5 431 traced_tid = l_traced_tid;
e8efa18d 432
8a9103df 433 dbg_printf("LTT ltt_usertrace_fast_daemon : init is %d, pid is %lu, traced_pid is %lu, traced_tid is %lu\n",
4c992ad5 434 shared_trace_info->init, getpid(), traced_pid, traced_tid);
700d350d 435
e8efa18d 436 act.sa_handler = handler_sigusr1;
700d350d 437 act.sa_flags = 0;
438 sigemptyset(&(act.sa_mask));
439 sigaddset(&(act.sa_mask), SIGUSR1);
440 sigaction(SIGUSR1, &act, NULL);
e8efa18d 441
442 act.sa_handler = handler_sigusr2;
443 act.sa_flags = 0;
444 sigemptyset(&(act.sa_mask));
445 sigaddset(&(act.sa_mask), SIGUSR2);
446 sigaction(SIGUSR2, &act, NULL);
447
448 act.sa_handler = handler_sigalarm;
449 act.sa_flags = 0;
450 sigemptyset(&(act.sa_mask));
451 sigaddset(&(act.sa_mask), SIGALRM);
452 sigaction(SIGALRM, &act, NULL);
453
e8efa18d 454 alarm(3);
455
700d350d 456 while(1) {
c095a20d 457 ret = sigsuspend(&oldset);
458 if(ret != -1) {
459 perror("LTT Error in sigsuspend\n");
460 }
a85b8f41 461 if(traced_pid == 0) break; /* parent died */
e8efa18d 462 if(parent_exited) break;
8a9103df 463 dbg_printf("LTT Doing a buffer switch read. pid is : %lu\n", getpid());
7076f16d 464
47d7d576 465 do {
b402c055 466 ret = read_subbuffer(&shared_trace_info->channel.process, fd_process);
47d7d576 467 } while(ret == 0);
700d350d 468 }
32f2b04a 469 /* The parent thread is dead and we have finished with the buffer */
470
471 /* Buffer force switch (flush). Using FLUSH instead of ACTIVE because we know
472 * there is no writer. */
b402c055 473 flush_buffer(&shared_trace_info->channel.process, FORCE_FLUSH);
47d7d576 474 do {
b402c055 475 ret = read_subbuffer(&shared_trace_info->channel.process, fd_process);
47d7d576 476 } while(ret == 0);
477
e0cd021d 478 if(fd_process != -1)
479 close(fd_process);
77b31f39 480
85b94320 481 ret = sem_destroy(&shared_trace_info->channel.process.writer_sem);
482 if(ret < 0) {
483 perror("error in sem_destroy");
484 }
e8efa18d 485 munmap(shared_trace_info, sizeof(*shared_trace_info));
486
487 exit(0);
700d350d 488}
b09f3215 489
e8efa18d 490
491/* Reader-writer initialization */
492
493static enum ltt_process_role { LTT_ROLE_WRITER, LTT_ROLE_READER }
494 role = LTT_ROLE_WRITER;
495
496
497void ltt_rw_init(void)
b09f3215 498{
700d350d 499 pid_t pid;
500 struct ltt_trace_info *shared_trace_info;
501 int ret;
502 sigset_t set, oldset;
a85b8f41 503 pid_t l_traced_pid = getpid();
4c992ad5 504 pid_t l_traced_tid = gettid();
700d350d 505
506 /* parent : create the shared memory map */
a85b8f41 507 shared_trace_info = mmap(0, sizeof(*thread_trace_info),
700d350d 508 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, 0, 0);
8a9103df 509 shared_trace_info->init=0;
510 shared_trace_info->filter=0;
511 shared_trace_info->daemon_id=0;
512 shared_trace_info->nesting=0;
b402c055 513 memset(&shared_trace_info->channel.process, 0,
514 sizeof(shared_trace_info->channel.process));
85b94320 515 //Need NPTL!
516 ret = sem_init(&shared_trace_info->channel.process.writer_sem, 1,
517 LTT_N_SUBBUFS);
518 if(ret < 0) {
519 perror("error in sem_init");
520 }
b402c055 521 shared_trace_info->channel.process.alloc_size = LTT_BUF_SIZE_PROCESS;
522 shared_trace_info->channel.process.subbuf_size = LTT_SUBBUF_SIZE_PROCESS;
523 shared_trace_info->channel.process.start =
524 shared_trace_info->channel.process_buf;
525 ltt_buffer_begin_callback(&shared_trace_info->channel.process,
5ffa9d14 526 ltt_get_timestamp(), 0);
527
a85b8f41 528 shared_trace_info->init = 1;
700d350d 529
530 /* Disable signals */
531 ret = sigfillset(&set);
532 if(ret) {
8a9103df 533 dbg_printf("LTT Error in sigfillset\n");
700d350d 534 }
535
700d350d 536 ret = pthread_sigmask(SIG_BLOCK, &set, &oldset);
537 if(ret) {
8a9103df 538 dbg_printf("LTT Error in pthread_sigmask\n");
700d350d 539 }
a85b8f41 540
700d350d 541 pid = fork();
542 if(pid > 0) {
543 /* Parent */
a85b8f41 544 shared_trace_info->daemon_id = pid;
545 thread_trace_info = shared_trace_info;
700d350d 546
547 /* Enable signals */
548 ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
549 if(ret) {
8a9103df 550 dbg_printf("LTT Error in pthread_sigmask\n");
700d350d 551 }
552 } else if(pid == 0) {
b5d612cb 553 pid_t sid;
700d350d 554 /* Child */
e8efa18d 555 role = LTT_ROLE_READER;
b5d612cb 556 sid = setsid();
b402c055 557 //Not a good idea to renice, unless futex wait eventually implement
558 //priority inheritence.
559 //ret = nice(1);
560 //if(ret < 0) {
561 // perror("Error in nice");
562 //}
b5d612cb 563 if(sid < 0) {
564 perror("Error setting sid");
565 }
77b31f39 566 ltt_usertrace_fast_daemon(shared_trace_info, oldset, l_traced_pid,
4c992ad5 567 l_traced_tid);
700d350d 568 /* Should never return */
569 exit(-1);
570 } else if(pid < 0) {
571 /* fork error */
a35eaa9c 572 perror("LTT Error in forking ltt-usertrace-fast");
700d350d 573 }
b09f3215 574}
575
e8efa18d 576static __thread struct _pthread_cleanup_buffer cleanup_buffer;
577
578void ltt_thread_init(void)
579{
580 _pthread_cleanup_push(&cleanup_buffer, ltt_usertrace_fast_cleanup, NULL);
581 ltt_rw_init();
582}
583
04180f7f 584void __attribute__((constructor)) __ltt_usertrace_fast_init(void)
b09f3215 585{
8a9103df 586 dbg_printf("LTT usertrace-fast init\n");
b09f3215 587
e8efa18d 588 ltt_rw_init();
700d350d 589}
590
591void __attribute__((destructor)) __ltt_usertrace_fast_fini(void)
592{
e8efa18d 593 if(role == LTT_ROLE_WRITER) {
8a9103df 594 dbg_printf("LTT usertrace-fast fini\n");
e8efa18d 595 ltt_usertrace_fast_cleanup(NULL);
596 }
b09f3215 597}
598
This page took 0.055445 seconds and 4 git commands to generate.