update QUICKSTART
[lttv.git] / ltt-usertrace / ltt-usertrace-fast.c
CommitLineData
700d350d 1/* LTTng user-space "fast" library
2 *
3 * This daemon is spawned by each traced thread (to share the mmap).
4 *
5 * Its job is to dump periodically this buffer to disk (when it receives a
6 * SIGUSR1 from its parent).
7 *
8 * It uses the control information in the shared memory area (producer/consumer
9 * count).
10 *
11 * When the parent thread dies (yes, those thing may happen) ;) , this daemon
12 * will flush the last buffer and write it to disk.
13 *
14 * Supplement note for streaming : the daemon is responsible for flushing
15 * periodically the buffer if it is streaming data.
16 *
b09f3215 17 *
700d350d 18 * Notes :
19 * shm memory is typically limited to 4096 units (system wide limit SHMMNI in
20 * /proc/sys/kernel/shmmni). As it requires computation time upon creation, we
21 * do not use it : we will use a shared mmap() instead which is passed through
22 * the fork().
23 * MAP_SHARED mmap segment. Updated when msync or munmap are called.
24 * MAP_ANONYMOUS.
25 * Memory mapped by mmap() is preserved across fork(2), with the same
26 * attributes.
27 *
28 * Eventually, there will be two mode :
29 * * Slow thread spawn : a fork() is done for each new thread. If the process
30 * dies, the data is not lost.
31 * * Fast thread spawn : a pthread_create() is done by the application for each
32 * new thread.
a85b8f41 33 *
34 * We use a timer to check periodically if the parent died. I think it is less
35 * intrusive than a ptrace() on the parent, which would get every signal. The
36 * side effect of this is that we won't be notified if the parent does an
37 * exec(). In this case, we will just sit there until the parent exits.
38 *
39 *
b09f3215 40 * Copyright 2006 Mathieu Desnoyers
41 *
42 */
43
8a9103df 44#define inline inline __attribute__((always_inline))
45
32f2b04a 46#define _GNU_SOURCE
8b30e7bc 47#define LTT_TRACE
976db1b3 48#define LTT_TRACE_FAST
b09f3215 49#include <unistd.h>
50#include <stdlib.h>
51#include <stdio.h>
52#include <signal.h>
53#include <syscall.h>
54#include <features.h>
55#include <pthread.h>
56#include <malloc.h>
57#include <string.h>
700d350d 58#include <signal.h>
77b31f39 59#include <fcntl.h>
60#include <stdlib.h>
4359c2bb 61#include <errno.h>
77b31f39 62#include <sys/param.h>
47d7d576 63#include <sys/time.h>
4359c2bb 64#include <sys/types.h>
65#include <sys/wait.h>
66#include <sys/stat.h>
67#include <sys/mman.h>
5ecaa1ab 68#include <unistd.h>
69#include <sys/syscall.h>
77b31f39 70
976db1b3 71#include <ltt/ltt-usertrace.h>
b09f3215 72
5ecaa1ab 73#define gettid() syscall(__NR_gettid)
4359c2bb 74
8a9103df 75#ifdef LTT_SHOW_DEBUG
fcbe1ea8 76#define dbg_printf(...) printf(__VA_ARGS__)
8a9103df 77#else
78#define dbg_printf(...)
79#endif //LTT_SHOW_DEBUG
80
81
32f2b04a 82enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
b09f3215 83
e8efa18d 84/* Writer (the traced application) */
b09f3215 85
e8efa18d 86__thread struct ltt_trace_info *thread_trace_info = NULL;
700d350d 87
e8efa18d 88void ltt_usertrace_fast_buffer_switch(void)
89{
a85b8f41 90 struct ltt_trace_info *tmp = thread_trace_info;
91 if(tmp)
92 kill(tmp->daemon_id, SIGUSR1);
e8efa18d 93}
700d350d 94
77b31f39 95/* The cleanup should never be called from a signal handler */
e8efa18d 96static void ltt_usertrace_fast_cleanup(void *arg)
b09f3215 97{
a85b8f41 98 struct ltt_trace_info *tmp = thread_trace_info;
99 if(tmp) {
100 thread_trace_info = NULL;
101 kill(tmp->daemon_id, SIGUSR2);
102 munmap(tmp, sizeof(*tmp));
103 }
700d350d 104}
b09f3215 105
e8efa18d 106/* Reader (the disk dumper daemon) */
700d350d 107
a85b8f41 108static pid_t traced_pid = 0;
4c992ad5 109static pid_t traced_tid = 0;
e8efa18d 110static int parent_exited = 0;
c095a20d 111static int fd_process = -1;
112static char outfile_name[PATH_MAX];
113static char identifier_name[PATH_MAX];
700d350d 114
e8efa18d 115/* signal handling */
116static void handler_sigusr1(int signo)
700d350d 117{
8a9103df 118 dbg_printf("LTT Signal %d received : parent buffer switch.\n", signo);
e8efa18d 119}
120
121static void handler_sigusr2(int signo)
122{
8a9103df 123 dbg_printf("LTT Signal %d received : parent exited.\n", signo);
e8efa18d 124 parent_exited = 1;
125}
126
127static void handler_sigalarm(int signo)
128{
8a9103df 129 dbg_printf("LTT Signal %d received\n", signo);
e8efa18d 130
a85b8f41 131 if(getppid() != traced_pid) {
e8efa18d 132 /* Parent died */
8a9103df 133 dbg_printf("LTT Parent %lu died, cleaning up\n", traced_pid);
a85b8f41 134 traced_pid = 0;
e8efa18d 135 }
136 alarm(3);
b09f3215 137}
138
47d7d576 139/* Do a buffer switch. Don't switch if buffer is completely empty */
32f2b04a 140static void flush_buffer(struct ltt_buf *ltt_buf, enum force_switch_mode mode)
47d7d576 141{
32f2b04a 142 uint64_t tsc;
143 int offset_begin, offset_end, offset_old;
144 int reserve_commit_diff;
145 int consumed_old, consumed_new;
146 int commit_count, reserve_count;
147 int end_switch_old;
47d7d576 148
32f2b04a 149 do {
150 offset_old = atomic_read(&ltt_buf->offset);
151 offset_begin = offset_old;
152 end_switch_old = 0;
153 tsc = ltt_get_timestamp();
154 if(tsc == 0) {
155 /* Error in getting the timestamp : should not happen : it would
156 * mean we are called from an NMI during a write seqlock on xtime. */
157 return;
158 }
159
160 if(SUBBUF_OFFSET(offset_begin, ltt_buf) != 0) {
161 offset_begin = SUBBUF_ALIGN(offset_begin, ltt_buf);
162 end_switch_old = 1;
163 } else {
164 /* we do not have to switch : buffer is empty */
165 return;
166 }
167 if(mode == FORCE_ACTIVE)
168 offset_begin += ltt_subbuf_header_len(ltt_buf);
169 /* Always begin_switch in FORCE_ACTIVE mode */
170
171 /* Test new buffer integrity */
172 reserve_commit_diff =
173 atomic_read(
174 &ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])
175 - atomic_read(
176 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
177 if(reserve_commit_diff == 0) {
178 /* Next buffer not corrupted. */
179 if(mode == FORCE_ACTIVE
180 && (offset_begin-atomic_read(&ltt_buf->consumed))
181 >= ltt_buf->alloc_size) {
182 /* We do not overwrite non consumed buffers and we are full : ignore
183 switch while tracing is active. */
184 return;
185 }
186 } else {
187 /* Next subbuffer corrupted. Force pushing reader even in normal mode */
188 }
189
190 offset_end = offset_begin;
191 } while(atomic_cmpxchg(&ltt_buf->offset, offset_old, offset_end)
192 != offset_old);
193
194
195 if(mode == FORCE_ACTIVE) {
196 /* Push the reader if necessary */
197 do {
198 consumed_old = atomic_read(&ltt_buf->consumed);
199 /* If buffer is in overwrite mode, push the reader consumed count if
200 the write position has reached it and we are not at the first
201 iteration (don't push the reader farther than the writer).
202 This operation can be done concurrently by many writers in the
203 same buffer, the writer being at the fartest write position sub-buffer
204 index in the buffer being the one which will win this loop. */
205 /* If the buffer is not in overwrite mode, pushing the reader only
206 happen if a sub-buffer is corrupted */
680b9daa 207 if((SUBBUF_TRUNC(offset_end-1, ltt_buf)
32f2b04a 208 - SUBBUF_TRUNC(consumed_old, ltt_buf))
209 >= ltt_buf->alloc_size)
210 consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf);
211 else {
212 consumed_new = consumed_old;
213 break;
214 }
215 } while(atomic_cmpxchg(&ltt_buf->consumed, consumed_old, consumed_new)
216 != consumed_old);
217
218 if(consumed_old != consumed_new) {
219 /* Reader pushed : we are the winner of the push, we can therefore
220 reequilibrate reserve and commit. Atomic increment of the commit
221 count permits other writers to play around with this variable
222 before us. We keep track of corrupted_subbuffers even in overwrite
223 mode :
224 we never want to write over a non completely committed sub-buffer :
225 possible causes : the buffer size is too low compared to the unordered
226 data input, or there is a writer who died between the reserve and the
227 commit. */
228 if(reserve_commit_diff) {
229 /* We have to alter the sub-buffer commit count : a sub-buffer is
230 corrupted */
231 atomic_add(reserve_commit_diff,
232 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
233 atomic_inc(&ltt_buf->corrupted_subbuffers);
234 }
235 }
236 }
237
238 /* Always switch */
239
240 if(end_switch_old) {
241 /* old subbuffer */
242 /* Concurrency safe because we are the last and only thread to alter this
243 sub-buffer. As long as it is not delivered and read, no other thread can
244 alter the offset, alter the reserve_count or call the
245 client_buffer_end_callback on this sub-buffer.
246 The only remaining threads could be the ones with pending commits. They
247 will have to do the deliver themself.
248 Not concurrency safe in overwrite mode. We detect corrupted subbuffers with
249 commit and reserve counts. We keep a corrupted sub-buffers count and push
250 the readers across these sub-buffers.
251 Not concurrency safe if a writer is stalled in a subbuffer and
252 another writer switches in, finding out it's corrupted. The result will be
253 than the old (uncommited) subbuffer will be declared corrupted, and that
254 the new subbuffer will be declared corrupted too because of the commit
255 count adjustment.
256 Offset old should never be 0. */
257 ltt_buffer_end_callback(ltt_buf, tsc, offset_old,
258 SUBBUF_INDEX((offset_old), ltt_buf));
259 /* Setting this reserve_count will allow the sub-buffer to be delivered by
260 the last committer. */
261 reserve_count = atomic_add_return((SUBBUF_OFFSET((offset_old-1),
262 ltt_buf) + 1),
263 &ltt_buf->reserve_count[SUBBUF_INDEX((offset_old),
264 ltt_buf)]);
265 if(reserve_count == atomic_read(
266 &ltt_buf->commit_count[SUBBUF_INDEX((offset_old), ltt_buf)])) {
267 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX((offset_old), ltt_buf), NULL);
268 }
269 }
270
271 if(mode == FORCE_ACTIVE) {
272 /* New sub-buffer */
273 /* This code can be executed unordered : writers may already have written
274 to the sub-buffer before this code gets executed, caution. */
275 /* The commit makes sure that this code is executed before the deliver
276 of this sub-buffer */
277 ltt_buffer_begin_callback(ltt_buf, tsc, SUBBUF_INDEX(offset_begin, ltt_buf));
278 commit_count = atomic_add_return(ltt_subbuf_header_len(ltt_buf),
279 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
280 /* Check if the written buffer has to be delivered */
281 if(commit_count == atomic_read(
282 &ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])) {
283 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX(offset_begin, ltt_buf), NULL);
284 }
285 }
47d7d576 286
287}
288
c095a20d 289
290static int open_output_files(void)
291{
292 int ret;
293 int fd;
294 /* Open output files */
295 umask(00000);
296 ret = mkdir(LTT_USERTRACE_ROOT, 0777);
297 if(ret < 0 && errno != EEXIST) {
298 perror("LTT Error in creating output (mkdir)");
299 exit(-1);
300 }
301 ret = chdir(LTT_USERTRACE_ROOT);
302 if(ret < 0) {
303 perror("LTT Error in creating output (chdir)");
304 exit(-1);
305 }
306 snprintf(identifier_name, PATH_MAX-1, "%lu.%lu.%llu",
307 traced_tid, traced_pid, get_cycles());
308 snprintf(outfile_name, PATH_MAX-1, "process-%s", identifier_name);
309
310#ifndef LTT_NULL_OUTPUT_TEST
311 fd = creat(outfile_name, 0644);
312#else
313 /* NULL test */
314 ret = symlink("/dev/null", outfile_name);
315 if(ret < 0) {
316 perror("error in symlink");
317 exit(-1);
318 }
319 fd = open(outfile_name, O_WRONLY);
320 if(fd_process < 0) {
321 perror("Error in open");
322 exit(-1);
323 }
324#endif //LTT_NULL_OUTPUT_TEST
325 return fd;
326}
327
47d7d576 328static inline int ltt_buffer_get(struct ltt_buf *ltt_buf,
329 unsigned int *offset)
330{
331 unsigned int consumed_old, consumed_idx;
332 consumed_old = atomic_read(&ltt_buf->consumed);
333 consumed_idx = SUBBUF_INDEX(consumed_old, ltt_buf);
334
335 if(atomic_read(&ltt_buf->commit_count[consumed_idx])
336 != atomic_read(&ltt_buf->reserve_count[consumed_idx])) {
337 return -EAGAIN;
338 }
339 if((SUBBUF_TRUNC(atomic_read(&ltt_buf->offset), ltt_buf)
340 -SUBBUF_TRUNC(consumed_old, ltt_buf)) == 0) {
341 return -EAGAIN;
342 }
343
344 *offset = consumed_old;
345
346 return 0;
347}
348
349static inline int ltt_buffer_put(struct ltt_buf *ltt_buf,
350 unsigned int offset)
351{
352 unsigned int consumed_old, consumed_new;
353 int ret;
354
355 consumed_old = offset;
356 consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf);
357 if(atomic_cmpxchg(&ltt_buf->consumed, consumed_old, consumed_new)
358 != consumed_old) {
359 /* We have been pushed by the writer : the last buffer read _is_
360 * corrupted!
361 * It can also happen if this is a buffer we never got. */
362 return -EIO;
363 } else {
e0cd021d 364 if(traced_pid == 0 || parent_exited) return 0;
365
85b94320 366 ret = sem_post(&ltt_buf->writer_sem);
367 if(ret < 0) {
368 printf("error in sem_post");
47d7d576 369 }
370 }
371}
372
47d7d576 373static int read_subbuffer(struct ltt_buf *ltt_buf, int fd)
374{
32f2b04a 375 unsigned int consumed_old;
47d7d576 376 int err;
8a9103df 377 dbg_printf("LTT read buffer\n");
47d7d576 378
379
32f2b04a 380 err = ltt_buffer_get(ltt_buf, &consumed_old);
5ffa9d14 381 if(err != 0) {
8a9103df 382 if(err != -EAGAIN) dbg_printf("LTT Reserving sub buffer failed\n");
47d7d576 383 goto get_error;
384 }
c095a20d 385 if(fd_process == -1) {
386 fd_process = fd = open_output_files();
387 }
47d7d576 388
389 err = TEMP_FAILURE_RETRY(write(fd,
390 ltt_buf->start
391 + (consumed_old & ((ltt_buf->alloc_size)-1)),
392 ltt_buf->subbuf_size));
393
394 if(err < 0) {
395 perror("Error in writing to file");
396 goto write_error;
397 }
398#if 0
399 err = fsync(pair->trace);
400 if(err < 0) {
401 ret = errno;
402 perror("Error in writing to file");
403 goto write_error;
404 }
405#endif //0
406write_error:
32f2b04a 407 err = ltt_buffer_put(ltt_buf, consumed_old);
47d7d576 408
409 if(err != 0) {
410 if(err == -EIO) {
8a9103df 411 dbg_printf("Reader has been pushed by the writer, last subbuffer corrupted.\n");
47d7d576 412 /* FIXME : we may delete the last written buffer if we wish. */
413 }
414 goto get_error;
415 }
416
417get_error:
418 return err;
419}
e8efa18d 420
a85b8f41 421/* This function is called by ltt_rw_init which has signals blocked */
700d350d 422static void ltt_usertrace_fast_daemon(struct ltt_trace_info *shared_trace_info,
4c992ad5 423 sigset_t oldset, pid_t l_traced_pid, pthread_t l_traced_tid)
700d350d 424{
425 struct sigaction act;
426 int ret;
77b31f39 427
a85b8f41 428 traced_pid = l_traced_pid;
4c992ad5 429 traced_tid = l_traced_tid;
e8efa18d 430
8a9103df 431 dbg_printf("LTT ltt_usertrace_fast_daemon : init is %d, pid is %lu, traced_pid is %lu, traced_tid is %lu\n",
4c992ad5 432 shared_trace_info->init, getpid(), traced_pid, traced_tid);
700d350d 433
e8efa18d 434 act.sa_handler = handler_sigusr1;
700d350d 435 act.sa_flags = 0;
436 sigemptyset(&(act.sa_mask));
437 sigaddset(&(act.sa_mask), SIGUSR1);
438 sigaction(SIGUSR1, &act, NULL);
e8efa18d 439
440 act.sa_handler = handler_sigusr2;
441 act.sa_flags = 0;
442 sigemptyset(&(act.sa_mask));
443 sigaddset(&(act.sa_mask), SIGUSR2);
444 sigaction(SIGUSR2, &act, NULL);
445
446 act.sa_handler = handler_sigalarm;
447 act.sa_flags = 0;
448 sigemptyset(&(act.sa_mask));
449 sigaddset(&(act.sa_mask), SIGALRM);
450 sigaction(SIGALRM, &act, NULL);
451
e8efa18d 452 alarm(3);
453
700d350d 454 while(1) {
c095a20d 455 ret = sigsuspend(&oldset);
456 if(ret != -1) {
457 perror("LTT Error in sigsuspend\n");
458 }
a85b8f41 459 if(traced_pid == 0) break; /* parent died */
e8efa18d 460 if(parent_exited) break;
8a9103df 461 dbg_printf("LTT Doing a buffer switch read. pid is : %lu\n", getpid());
7076f16d 462
47d7d576 463 do {
b402c055 464 ret = read_subbuffer(&shared_trace_info->channel.process, fd_process);
47d7d576 465 } while(ret == 0);
700d350d 466 }
32f2b04a 467 /* The parent thread is dead and we have finished with the buffer */
468
469 /* Buffer force switch (flush). Using FLUSH instead of ACTIVE because we know
470 * there is no writer. */
b402c055 471 flush_buffer(&shared_trace_info->channel.process, FORCE_FLUSH);
47d7d576 472 do {
b402c055 473 ret = read_subbuffer(&shared_trace_info->channel.process, fd_process);
47d7d576 474 } while(ret == 0);
475
e0cd021d 476 if(fd_process != -1)
477 close(fd_process);
77b31f39 478
85b94320 479 ret = sem_destroy(&shared_trace_info->channel.process.writer_sem);
480 if(ret < 0) {
481 perror("error in sem_destroy");
482 }
e8efa18d 483 munmap(shared_trace_info, sizeof(*shared_trace_info));
484
485 exit(0);
700d350d 486}
b09f3215 487
e8efa18d 488
489/* Reader-writer initialization */
490
491static enum ltt_process_role { LTT_ROLE_WRITER, LTT_ROLE_READER }
492 role = LTT_ROLE_WRITER;
493
494
495void ltt_rw_init(void)
b09f3215 496{
700d350d 497 pid_t pid;
498 struct ltt_trace_info *shared_trace_info;
499 int ret;
500 sigset_t set, oldset;
a85b8f41 501 pid_t l_traced_pid = getpid();
4c992ad5 502 pid_t l_traced_tid = gettid();
700d350d 503
504 /* parent : create the shared memory map */
a85b8f41 505 shared_trace_info = mmap(0, sizeof(*thread_trace_info),
700d350d 506 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, 0, 0);
8a9103df 507 shared_trace_info->init=0;
508 shared_trace_info->filter=0;
509 shared_trace_info->daemon_id=0;
510 shared_trace_info->nesting=0;
b402c055 511 memset(&shared_trace_info->channel.process, 0,
512 sizeof(shared_trace_info->channel.process));
85b94320 513 //Need NPTL!
514 ret = sem_init(&shared_trace_info->channel.process.writer_sem, 1,
515 LTT_N_SUBBUFS);
516 if(ret < 0) {
517 perror("error in sem_init");
518 }
b402c055 519 shared_trace_info->channel.process.alloc_size = LTT_BUF_SIZE_PROCESS;
520 shared_trace_info->channel.process.subbuf_size = LTT_SUBBUF_SIZE_PROCESS;
521 shared_trace_info->channel.process.start =
522 shared_trace_info->channel.process_buf;
523 ltt_buffer_begin_callback(&shared_trace_info->channel.process,
5ffa9d14 524 ltt_get_timestamp(), 0);
525
a85b8f41 526 shared_trace_info->init = 1;
700d350d 527
528 /* Disable signals */
529 ret = sigfillset(&set);
530 if(ret) {
8a9103df 531 dbg_printf("LTT Error in sigfillset\n");
700d350d 532 }
533
700d350d 534 ret = pthread_sigmask(SIG_BLOCK, &set, &oldset);
535 if(ret) {
8a9103df 536 dbg_printf("LTT Error in pthread_sigmask\n");
700d350d 537 }
a85b8f41 538
700d350d 539 pid = fork();
540 if(pid > 0) {
541 /* Parent */
a85b8f41 542 shared_trace_info->daemon_id = pid;
543 thread_trace_info = shared_trace_info;
700d350d 544
545 /* Enable signals */
546 ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
547 if(ret) {
8a9103df 548 dbg_printf("LTT Error in pthread_sigmask\n");
700d350d 549 }
550 } else if(pid == 0) {
b5d612cb 551 pid_t sid;
700d350d 552 /* Child */
e8efa18d 553 role = LTT_ROLE_READER;
b5d612cb 554 sid = setsid();
b402c055 555 //Not a good idea to renice, unless futex wait eventually implement
556 //priority inheritence.
557 //ret = nice(1);
558 //if(ret < 0) {
559 // perror("Error in nice");
560 //}
b5d612cb 561 if(sid < 0) {
562 perror("Error setting sid");
563 }
77b31f39 564 ltt_usertrace_fast_daemon(shared_trace_info, oldset, l_traced_pid,
4c992ad5 565 l_traced_tid);
700d350d 566 /* Should never return */
567 exit(-1);
568 } else if(pid < 0) {
569 /* fork error */
a35eaa9c 570 perror("LTT Error in forking ltt-usertrace-fast");
700d350d 571 }
b09f3215 572}
573
e8efa18d 574static __thread struct _pthread_cleanup_buffer cleanup_buffer;
575
576void ltt_thread_init(void)
577{
578 _pthread_cleanup_push(&cleanup_buffer, ltt_usertrace_fast_cleanup, NULL);
579 ltt_rw_init();
580}
581
04180f7f 582void __attribute__((constructor)) __ltt_usertrace_fast_init(void)
b09f3215 583{
8a9103df 584 dbg_printf("LTT usertrace-fast init\n");
b09f3215 585
e8efa18d 586 ltt_rw_init();
700d350d 587}
588
589void __attribute__((destructor)) __ltt_usertrace_fast_fini(void)
590{
e8efa18d 591 if(role == LTT_ROLE_WRITER) {
8a9103df 592 dbg_printf("LTT usertrace-fast fini\n");
e8efa18d 593 ltt_usertrace_fast_cleanup(NULL);
594 }
b09f3215 595}
596
This page took 0.058449 seconds and 4 git commands to generate.