update syscalls
[lttv.git] / ltt-usertrace / ltt-usertrace-fast.c
CommitLineData
700d350d 1/* LTTng user-space "fast" library
2 *
3 * This daemon is spawned by each traced thread (to share the mmap).
4 *
5 * Its job is to dump periodically this buffer to disk (when it receives a
6 * SIGUSR1 from its parent).
7 *
8 * It uses the control information in the shared memory area (producer/consumer
9 * count).
10 *
11 * When the parent thread dies (yes, those thing may happen) ;) , this daemon
12 * will flush the last buffer and write it to disk.
13 *
14 * Supplement note for streaming : the daemon is responsible for flushing
15 * periodically the buffer if it is streaming data.
16 *
b09f3215 17 *
700d350d 18 * Notes :
19 * shm memory is typically limited to 4096 units (system wide limit SHMMNI in
20 * /proc/sys/kernel/shmmni). As it requires computation time upon creation, we
21 * do not use it : we will use a shared mmap() instead which is passed through
22 * the fork().
23 * MAP_SHARED mmap segment. Updated when msync or munmap are called.
24 * MAP_ANONYMOUS.
25 * Memory mapped by mmap() is preserved across fork(2), with the same
26 * attributes.
27 *
28 * Eventually, there will be two mode :
29 * * Slow thread spawn : a fork() is done for each new thread. If the process
30 * dies, the data is not lost.
31 * * Fast thread spawn : a pthread_create() is done by the application for each
32 * new thread.
a85b8f41 33 *
34 * We use a timer to check periodically if the parent died. I think it is less
35 * intrusive than a ptrace() on the parent, which would get every signal. The
36 * side effect of this is that we won't be notified if the parent does an
37 * exec(). In this case, we will just sit there until the parent exits.
38 *
39 *
b09f3215 40 * Copyright 2006 Mathieu Desnoyers
41 *
42 */
43
8a9103df 44#define inline inline __attribute__((always_inline))
45
32f2b04a 46#define _GNU_SOURCE
8b30e7bc 47#define LTT_TRACE
976db1b3 48#define LTT_TRACE_FAST
b09f3215 49#include <unistd.h>
50#include <stdlib.h>
51#include <stdio.h>
52#include <signal.h>
53#include <syscall.h>
54#include <features.h>
55#include <pthread.h>
56#include <malloc.h>
57#include <string.h>
700d350d 58#include <signal.h>
77b31f39 59#include <fcntl.h>
60#include <stdlib.h>
4359c2bb 61#include <errno.h>
77b31f39 62#include <sys/param.h>
47d7d576 63#include <sys/time.h>
4359c2bb 64#include <sys/types.h>
65#include <sys/wait.h>
66#include <sys/stat.h>
67#include <sys/mman.h>
5ecaa1ab 68#include <unistd.h>
69#include <sys/syscall.h>
77b31f39 70
d86395c3 71// included with hack for powerpc in ltt-usertrace.h #include <asm/atomic.h>
77b31f39 72#include <asm/timex.h> //for get_cycles()
b09f3215 73
976db1b3 74#include <ltt/ltt-usertrace.h>
b09f3215 75
5ecaa1ab 76#define gettid() syscall(__NR_gettid)
4359c2bb 77
8a9103df 78#ifdef LTT_SHOW_DEBUG
fcbe1ea8 79#define dbg_printf(...) printf(__VA_ARGS__)
8a9103df 80#else
81#define dbg_printf(...)
82#endif //LTT_SHOW_DEBUG
83
84
32f2b04a 85enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
b09f3215 86
e8efa18d 87/* Writer (the traced application) */
b09f3215 88
e8efa18d 89__thread struct ltt_trace_info *thread_trace_info = NULL;
700d350d 90
e8efa18d 91void ltt_usertrace_fast_buffer_switch(void)
92{
a85b8f41 93 struct ltt_trace_info *tmp = thread_trace_info;
94 if(tmp)
95 kill(tmp->daemon_id, SIGUSR1);
e8efa18d 96}
700d350d 97
77b31f39 98/* The cleanup should never be called from a signal handler */
e8efa18d 99static void ltt_usertrace_fast_cleanup(void *arg)
b09f3215 100{
a85b8f41 101 struct ltt_trace_info *tmp = thread_trace_info;
102 if(tmp) {
103 thread_trace_info = NULL;
104 kill(tmp->daemon_id, SIGUSR2);
105 munmap(tmp, sizeof(*tmp));
106 }
700d350d 107}
b09f3215 108
e8efa18d 109/* Reader (the disk dumper daemon) */
700d350d 110
a85b8f41 111static pid_t traced_pid = 0;
4c992ad5 112static pid_t traced_tid = 0;
e8efa18d 113static int parent_exited = 0;
c095a20d 114static int fd_process = -1;
115static char outfile_name[PATH_MAX];
116static char identifier_name[PATH_MAX];
700d350d 117
e8efa18d 118/* signal handling */
119static void handler_sigusr1(int signo)
700d350d 120{
8a9103df 121 dbg_printf("LTT Signal %d received : parent buffer switch.\n", signo);
e8efa18d 122}
123
124static void handler_sigusr2(int signo)
125{
8a9103df 126 dbg_printf("LTT Signal %d received : parent exited.\n", signo);
e8efa18d 127 parent_exited = 1;
128}
129
130static void handler_sigalarm(int signo)
131{
8a9103df 132 dbg_printf("LTT Signal %d received\n", signo);
e8efa18d 133
a85b8f41 134 if(getppid() != traced_pid) {
e8efa18d 135 /* Parent died */
8a9103df 136 dbg_printf("LTT Parent %lu died, cleaning up\n", traced_pid);
a85b8f41 137 traced_pid = 0;
e8efa18d 138 }
139 alarm(3);
b09f3215 140}
141
47d7d576 142/* Do a buffer switch. Don't switch if buffer is completely empty */
32f2b04a 143static void flush_buffer(struct ltt_buf *ltt_buf, enum force_switch_mode mode)
47d7d576 144{
32f2b04a 145 uint64_t tsc;
146 int offset_begin, offset_end, offset_old;
147 int reserve_commit_diff;
148 int consumed_old, consumed_new;
149 int commit_count, reserve_count;
150 int end_switch_old;
47d7d576 151
32f2b04a 152 do {
153 offset_old = atomic_read(&ltt_buf->offset);
154 offset_begin = offset_old;
155 end_switch_old = 0;
156 tsc = ltt_get_timestamp();
157 if(tsc == 0) {
158 /* Error in getting the timestamp : should not happen : it would
159 * mean we are called from an NMI during a write seqlock on xtime. */
160 return;
161 }
162
163 if(SUBBUF_OFFSET(offset_begin, ltt_buf) != 0) {
164 offset_begin = SUBBUF_ALIGN(offset_begin, ltt_buf);
165 end_switch_old = 1;
166 } else {
167 /* we do not have to switch : buffer is empty */
168 return;
169 }
170 if(mode == FORCE_ACTIVE)
171 offset_begin += ltt_subbuf_header_len(ltt_buf);
172 /* Always begin_switch in FORCE_ACTIVE mode */
173
174 /* Test new buffer integrity */
175 reserve_commit_diff =
176 atomic_read(
177 &ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])
178 - atomic_read(
179 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
180 if(reserve_commit_diff == 0) {
181 /* Next buffer not corrupted. */
182 if(mode == FORCE_ACTIVE
183 && (offset_begin-atomic_read(&ltt_buf->consumed))
184 >= ltt_buf->alloc_size) {
185 /* We do not overwrite non consumed buffers and we are full : ignore
186 switch while tracing is active. */
187 return;
188 }
189 } else {
190 /* Next subbuffer corrupted. Force pushing reader even in normal mode */
191 }
192
193 offset_end = offset_begin;
194 } while(atomic_cmpxchg(&ltt_buf->offset, offset_old, offset_end)
195 != offset_old);
196
197
198 if(mode == FORCE_ACTIVE) {
199 /* Push the reader if necessary */
200 do {
201 consumed_old = atomic_read(&ltt_buf->consumed);
202 /* If buffer is in overwrite mode, push the reader consumed count if
203 the write position has reached it and we are not at the first
204 iteration (don't push the reader farther than the writer).
205 This operation can be done concurrently by many writers in the
206 same buffer, the writer being at the fartest write position sub-buffer
207 index in the buffer being the one which will win this loop. */
208 /* If the buffer is not in overwrite mode, pushing the reader only
209 happen if a sub-buffer is corrupted */
680b9daa 210 if((SUBBUF_TRUNC(offset_end-1, ltt_buf)
32f2b04a 211 - SUBBUF_TRUNC(consumed_old, ltt_buf))
212 >= ltt_buf->alloc_size)
213 consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf);
214 else {
215 consumed_new = consumed_old;
216 break;
217 }
218 } while(atomic_cmpxchg(&ltt_buf->consumed, consumed_old, consumed_new)
219 != consumed_old);
220
221 if(consumed_old != consumed_new) {
222 /* Reader pushed : we are the winner of the push, we can therefore
223 reequilibrate reserve and commit. Atomic increment of the commit
224 count permits other writers to play around with this variable
225 before us. We keep track of corrupted_subbuffers even in overwrite
226 mode :
227 we never want to write over a non completely committed sub-buffer :
228 possible causes : the buffer size is too low compared to the unordered
229 data input, or there is a writer who died between the reserve and the
230 commit. */
231 if(reserve_commit_diff) {
232 /* We have to alter the sub-buffer commit count : a sub-buffer is
233 corrupted */
234 atomic_add(reserve_commit_diff,
235 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
236 atomic_inc(&ltt_buf->corrupted_subbuffers);
237 }
238 }
239 }
240
241 /* Always switch */
242
243 if(end_switch_old) {
244 /* old subbuffer */
245 /* Concurrency safe because we are the last and only thread to alter this
246 sub-buffer. As long as it is not delivered and read, no other thread can
247 alter the offset, alter the reserve_count or call the
248 client_buffer_end_callback on this sub-buffer.
249 The only remaining threads could be the ones with pending commits. They
250 will have to do the deliver themself.
251 Not concurrency safe in overwrite mode. We detect corrupted subbuffers with
252 commit and reserve counts. We keep a corrupted sub-buffers count and push
253 the readers across these sub-buffers.
254 Not concurrency safe if a writer is stalled in a subbuffer and
255 another writer switches in, finding out it's corrupted. The result will be
256 than the old (uncommited) subbuffer will be declared corrupted, and that
257 the new subbuffer will be declared corrupted too because of the commit
258 count adjustment.
259 Offset old should never be 0. */
260 ltt_buffer_end_callback(ltt_buf, tsc, offset_old,
261 SUBBUF_INDEX((offset_old), ltt_buf));
262 /* Setting this reserve_count will allow the sub-buffer to be delivered by
263 the last committer. */
264 reserve_count = atomic_add_return((SUBBUF_OFFSET((offset_old-1),
265 ltt_buf) + 1),
266 &ltt_buf->reserve_count[SUBBUF_INDEX((offset_old),
267 ltt_buf)]);
268 if(reserve_count == atomic_read(
269 &ltt_buf->commit_count[SUBBUF_INDEX((offset_old), ltt_buf)])) {
270 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX((offset_old), ltt_buf), NULL);
271 }
272 }
273
274 if(mode == FORCE_ACTIVE) {
275 /* New sub-buffer */
276 /* This code can be executed unordered : writers may already have written
277 to the sub-buffer before this code gets executed, caution. */
278 /* The commit makes sure that this code is executed before the deliver
279 of this sub-buffer */
280 ltt_buffer_begin_callback(ltt_buf, tsc, SUBBUF_INDEX(offset_begin, ltt_buf));
281 commit_count = atomic_add_return(ltt_subbuf_header_len(ltt_buf),
282 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
283 /* Check if the written buffer has to be delivered */
284 if(commit_count == atomic_read(
285 &ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])) {
286 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX(offset_begin, ltt_buf), NULL);
287 }
288 }
47d7d576 289
290}
291
c095a20d 292
293static int open_output_files(void)
294{
295 int ret;
296 int fd;
297 /* Open output files */
298 umask(00000);
299 ret = mkdir(LTT_USERTRACE_ROOT, 0777);
300 if(ret < 0 && errno != EEXIST) {
301 perror("LTT Error in creating output (mkdir)");
302 exit(-1);
303 }
304 ret = chdir(LTT_USERTRACE_ROOT);
305 if(ret < 0) {
306 perror("LTT Error in creating output (chdir)");
307 exit(-1);
308 }
309 snprintf(identifier_name, PATH_MAX-1, "%lu.%lu.%llu",
310 traced_tid, traced_pid, get_cycles());
311 snprintf(outfile_name, PATH_MAX-1, "process-%s", identifier_name);
312
313#ifndef LTT_NULL_OUTPUT_TEST
314 fd = creat(outfile_name, 0644);
315#else
316 /* NULL test */
317 ret = symlink("/dev/null", outfile_name);
318 if(ret < 0) {
319 perror("error in symlink");
320 exit(-1);
321 }
322 fd = open(outfile_name, O_WRONLY);
323 if(fd_process < 0) {
324 perror("Error in open");
325 exit(-1);
326 }
327#endif //LTT_NULL_OUTPUT_TEST
328 return fd;
329}
330
47d7d576 331static inline int ltt_buffer_get(struct ltt_buf *ltt_buf,
332 unsigned int *offset)
333{
334 unsigned int consumed_old, consumed_idx;
335 consumed_old = atomic_read(&ltt_buf->consumed);
336 consumed_idx = SUBBUF_INDEX(consumed_old, ltt_buf);
337
338 if(atomic_read(&ltt_buf->commit_count[consumed_idx])
339 != atomic_read(&ltt_buf->reserve_count[consumed_idx])) {
340 return -EAGAIN;
341 }
342 if((SUBBUF_TRUNC(atomic_read(&ltt_buf->offset), ltt_buf)
343 -SUBBUF_TRUNC(consumed_old, ltt_buf)) == 0) {
344 return -EAGAIN;
345 }
346
347 *offset = consumed_old;
348
349 return 0;
350}
351
352static inline int ltt_buffer_put(struct ltt_buf *ltt_buf,
353 unsigned int offset)
354{
355 unsigned int consumed_old, consumed_new;
356 int ret;
357
358 consumed_old = offset;
359 consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf);
360 if(atomic_cmpxchg(&ltt_buf->consumed, consumed_old, consumed_new)
361 != consumed_old) {
362 /* We have been pushed by the writer : the last buffer read _is_
363 * corrupted!
364 * It can also happen if this is a buffer we never got. */
365 return -EIO;
366 } else {
e0cd021d 367 if(traced_pid == 0 || parent_exited) return 0;
368
85b94320 369 ret = sem_post(&ltt_buf->writer_sem);
370 if(ret < 0) {
371 printf("error in sem_post");
47d7d576 372 }
373 }
374}
375
47d7d576 376static int read_subbuffer(struct ltt_buf *ltt_buf, int fd)
377{
32f2b04a 378 unsigned int consumed_old;
47d7d576 379 int err;
8a9103df 380 dbg_printf("LTT read buffer\n");
47d7d576 381
382
32f2b04a 383 err = ltt_buffer_get(ltt_buf, &consumed_old);
5ffa9d14 384 if(err != 0) {
8a9103df 385 if(err != -EAGAIN) dbg_printf("LTT Reserving sub buffer failed\n");
47d7d576 386 goto get_error;
387 }
c095a20d 388 if(fd_process == -1) {
389 fd_process = fd = open_output_files();
390 }
47d7d576 391
392 err = TEMP_FAILURE_RETRY(write(fd,
393 ltt_buf->start
394 + (consumed_old & ((ltt_buf->alloc_size)-1)),
395 ltt_buf->subbuf_size));
396
397 if(err < 0) {
398 perror("Error in writing to file");
399 goto write_error;
400 }
401#if 0
402 err = fsync(pair->trace);
403 if(err < 0) {
404 ret = errno;
405 perror("Error in writing to file");
406 goto write_error;
407 }
408#endif //0
409write_error:
32f2b04a 410 err = ltt_buffer_put(ltt_buf, consumed_old);
47d7d576 411
412 if(err != 0) {
413 if(err == -EIO) {
8a9103df 414 dbg_printf("Reader has been pushed by the writer, last subbuffer corrupted.\n");
47d7d576 415 /* FIXME : we may delete the last written buffer if we wish. */
416 }
417 goto get_error;
418 }
419
420get_error:
421 return err;
422}
e8efa18d 423
a85b8f41 424/* This function is called by ltt_rw_init which has signals blocked */
700d350d 425static void ltt_usertrace_fast_daemon(struct ltt_trace_info *shared_trace_info,
4c992ad5 426 sigset_t oldset, pid_t l_traced_pid, pthread_t l_traced_tid)
700d350d 427{
428 struct sigaction act;
429 int ret;
77b31f39 430
a85b8f41 431 traced_pid = l_traced_pid;
4c992ad5 432 traced_tid = l_traced_tid;
e8efa18d 433
8a9103df 434 dbg_printf("LTT ltt_usertrace_fast_daemon : init is %d, pid is %lu, traced_pid is %lu, traced_tid is %lu\n",
4c992ad5 435 shared_trace_info->init, getpid(), traced_pid, traced_tid);
700d350d 436
e8efa18d 437 act.sa_handler = handler_sigusr1;
700d350d 438 act.sa_flags = 0;
439 sigemptyset(&(act.sa_mask));
440 sigaddset(&(act.sa_mask), SIGUSR1);
441 sigaction(SIGUSR1, &act, NULL);
e8efa18d 442
443 act.sa_handler = handler_sigusr2;
444 act.sa_flags = 0;
445 sigemptyset(&(act.sa_mask));
446 sigaddset(&(act.sa_mask), SIGUSR2);
447 sigaction(SIGUSR2, &act, NULL);
448
449 act.sa_handler = handler_sigalarm;
450 act.sa_flags = 0;
451 sigemptyset(&(act.sa_mask));
452 sigaddset(&(act.sa_mask), SIGALRM);
453 sigaction(SIGALRM, &act, NULL);
454
e8efa18d 455 alarm(3);
456
700d350d 457 while(1) {
c095a20d 458 ret = sigsuspend(&oldset);
459 if(ret != -1) {
460 perror("LTT Error in sigsuspend\n");
461 }
a85b8f41 462 if(traced_pid == 0) break; /* parent died */
e8efa18d 463 if(parent_exited) break;
8a9103df 464 dbg_printf("LTT Doing a buffer switch read. pid is : %lu\n", getpid());
7076f16d 465
47d7d576 466 do {
b402c055 467 ret = read_subbuffer(&shared_trace_info->channel.process, fd_process);
47d7d576 468 } while(ret == 0);
700d350d 469 }
32f2b04a 470 /* The parent thread is dead and we have finished with the buffer */
471
472 /* Buffer force switch (flush). Using FLUSH instead of ACTIVE because we know
473 * there is no writer. */
b402c055 474 flush_buffer(&shared_trace_info->channel.process, FORCE_FLUSH);
47d7d576 475 do {
b402c055 476 ret = read_subbuffer(&shared_trace_info->channel.process, fd_process);
47d7d576 477 } while(ret == 0);
478
e0cd021d 479 if(fd_process != -1)
480 close(fd_process);
77b31f39 481
85b94320 482 ret = sem_destroy(&shared_trace_info->channel.process.writer_sem);
483 if(ret < 0) {
484 perror("error in sem_destroy");
485 }
e8efa18d 486 munmap(shared_trace_info, sizeof(*shared_trace_info));
487
488 exit(0);
700d350d 489}
b09f3215 490
e8efa18d 491
492/* Reader-writer initialization */
493
494static enum ltt_process_role { LTT_ROLE_WRITER, LTT_ROLE_READER }
495 role = LTT_ROLE_WRITER;
496
497
498void ltt_rw_init(void)
b09f3215 499{
700d350d 500 pid_t pid;
501 struct ltt_trace_info *shared_trace_info;
502 int ret;
503 sigset_t set, oldset;
a85b8f41 504 pid_t l_traced_pid = getpid();
4c992ad5 505 pid_t l_traced_tid = gettid();
700d350d 506
507 /* parent : create the shared memory map */
a85b8f41 508 shared_trace_info = mmap(0, sizeof(*thread_trace_info),
700d350d 509 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, 0, 0);
8a9103df 510 shared_trace_info->init=0;
511 shared_trace_info->filter=0;
512 shared_trace_info->daemon_id=0;
513 shared_trace_info->nesting=0;
b402c055 514 memset(&shared_trace_info->channel.process, 0,
515 sizeof(shared_trace_info->channel.process));
85b94320 516 //Need NPTL!
517 ret = sem_init(&shared_trace_info->channel.process.writer_sem, 1,
518 LTT_N_SUBBUFS);
519 if(ret < 0) {
520 perror("error in sem_init");
521 }
b402c055 522 shared_trace_info->channel.process.alloc_size = LTT_BUF_SIZE_PROCESS;
523 shared_trace_info->channel.process.subbuf_size = LTT_SUBBUF_SIZE_PROCESS;
524 shared_trace_info->channel.process.start =
525 shared_trace_info->channel.process_buf;
526 ltt_buffer_begin_callback(&shared_trace_info->channel.process,
5ffa9d14 527 ltt_get_timestamp(), 0);
528
a85b8f41 529 shared_trace_info->init = 1;
700d350d 530
531 /* Disable signals */
532 ret = sigfillset(&set);
533 if(ret) {
8a9103df 534 dbg_printf("LTT Error in sigfillset\n");
700d350d 535 }
536
700d350d 537 ret = pthread_sigmask(SIG_BLOCK, &set, &oldset);
538 if(ret) {
8a9103df 539 dbg_printf("LTT Error in pthread_sigmask\n");
700d350d 540 }
a85b8f41 541
700d350d 542 pid = fork();
543 if(pid > 0) {
544 /* Parent */
a85b8f41 545 shared_trace_info->daemon_id = pid;
546 thread_trace_info = shared_trace_info;
700d350d 547
548 /* Enable signals */
549 ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
550 if(ret) {
8a9103df 551 dbg_printf("LTT Error in pthread_sigmask\n");
700d350d 552 }
553 } else if(pid == 0) {
b5d612cb 554 pid_t sid;
700d350d 555 /* Child */
e8efa18d 556 role = LTT_ROLE_READER;
b5d612cb 557 sid = setsid();
b402c055 558 //Not a good idea to renice, unless futex wait eventually implement
559 //priority inheritence.
560 //ret = nice(1);
561 //if(ret < 0) {
562 // perror("Error in nice");
563 //}
b5d612cb 564 if(sid < 0) {
565 perror("Error setting sid");
566 }
77b31f39 567 ltt_usertrace_fast_daemon(shared_trace_info, oldset, l_traced_pid,
4c992ad5 568 l_traced_tid);
700d350d 569 /* Should never return */
570 exit(-1);
571 } else if(pid < 0) {
572 /* fork error */
a35eaa9c 573 perror("LTT Error in forking ltt-usertrace-fast");
700d350d 574 }
b09f3215 575}
576
e8efa18d 577static __thread struct _pthread_cleanup_buffer cleanup_buffer;
578
579void ltt_thread_init(void)
580{
581 _pthread_cleanup_push(&cleanup_buffer, ltt_usertrace_fast_cleanup, NULL);
582 ltt_rw_init();
583}
584
04180f7f 585void __attribute__((constructor)) __ltt_usertrace_fast_init(void)
b09f3215 586{
8a9103df 587 dbg_printf("LTT usertrace-fast init\n");
b09f3215 588
e8efa18d 589 ltt_rw_init();
700d350d 590}
591
592void __attribute__((destructor)) __ltt_usertrace_fast_fini(void)
593{
e8efa18d 594 if(role == LTT_ROLE_WRITER) {
8a9103df 595 dbg_printf("LTT usertrace-fast fini\n");
e8efa18d 596 ltt_usertrace_fast_cleanup(NULL);
597 }
b09f3215 598}
599
This page took 0.05265 seconds and 4 git commands to generate.