add ppc atomic
[lttv.git] / ltt-usertrace / ltt-usertrace-fast.c
CommitLineData
700d350d 1/* LTTng user-space "fast" library
2 *
3 * This daemon is spawned by each traced thread (to share the mmap).
4 *
5 * Its job is to dump periodically this buffer to disk (when it receives a
6 * SIGUSR1 from its parent).
7 *
8 * It uses the control information in the shared memory area (producer/consumer
9 * count).
10 *
11 * When the parent thread dies (yes, those thing may happen) ;) , this daemon
12 * will flush the last buffer and write it to disk.
13 *
14 * Supplement note for streaming : the daemon is responsible for flushing
15 * periodically the buffer if it is streaming data.
16 *
b09f3215 17 *
700d350d 18 * Notes :
19 * shm memory is typically limited to 4096 units (system wide limit SHMMNI in
20 * /proc/sys/kernel/shmmni). As it requires computation time upon creation, we
21 * do not use it : we will use a shared mmap() instead which is passed through
22 * the fork().
23 * MAP_SHARED mmap segment. Updated when msync or munmap are called.
24 * MAP_ANONYMOUS.
25 * Memory mapped by mmap() is preserved across fork(2), with the same
26 * attributes.
27 *
28 * Eventually, there will be two mode :
29 * * Slow thread spawn : a fork() is done for each new thread. If the process
30 * dies, the data is not lost.
31 * * Fast thread spawn : a pthread_create() is done by the application for each
32 * new thread.
a85b8f41 33 *
34 * We use a timer to check periodically if the parent died. I think it is less
35 * intrusive than a ptrace() on the parent, which would get every signal. The
36 * side effect of this is that we won't be notified if the parent does an
37 * exec(). In this case, we will just sit there until the parent exits.
38 *
39 *
b09f3215 40 * Copyright 2006 Mathieu Desnoyers
41 *
42 */
43
8a9103df 44#define inline inline __attribute__((always_inline))
45
32f2b04a 46#define _GNU_SOURCE
8b30e7bc 47#define LTT_TRACE
976db1b3 48#define LTT_TRACE_FAST
b09f3215 49#include <sys/types.h>
50#include <sys/wait.h>
51#include <unistd.h>
52#include <stdlib.h>
53#include <stdio.h>
54#include <signal.h>
55#include <syscall.h>
56#include <features.h>
57#include <pthread.h>
58#include <malloc.h>
59#include <string.h>
700d350d 60#include <sys/mman.h>
61#include <signal.h>
77b31f39 62#include <sys/stat.h>
63#include <fcntl.h>
64#include <stdlib.h>
65#include <sys/param.h>
47d7d576 66#include <sys/time.h>
8b30e7bc 67#include <errno.h>
77b31f39 68
d86395c3 69// included with hack for powerpc in ltt-usertrace.h #include <asm/atomic.h>
77b31f39 70#include <asm/timex.h> //for get_cycles()
b09f3215 71
8b30e7bc 72_syscall0(pid_t,gettid)
73
976db1b3 74#include <ltt/ltt-usertrace.h>
b09f3215 75
8a9103df 76#ifdef LTT_SHOW_DEBUG
fcbe1ea8 77#define dbg_printf(...) printf(__VA_ARGS__)
8a9103df 78#else
79#define dbg_printf(...)
80#endif //LTT_SHOW_DEBUG
81
82
32f2b04a 83enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
b09f3215 84
e8efa18d 85/* Writer (the traced application) */
b09f3215 86
e8efa18d 87__thread struct ltt_trace_info *thread_trace_info = NULL;
700d350d 88
e8efa18d 89void ltt_usertrace_fast_buffer_switch(void)
90{
a85b8f41 91 struct ltt_trace_info *tmp = thread_trace_info;
92 if(tmp)
93 kill(tmp->daemon_id, SIGUSR1);
e8efa18d 94}
700d350d 95
77b31f39 96/* The cleanup should never be called from a signal handler */
e8efa18d 97static void ltt_usertrace_fast_cleanup(void *arg)
b09f3215 98{
a85b8f41 99 struct ltt_trace_info *tmp = thread_trace_info;
100 if(tmp) {
101 thread_trace_info = NULL;
102 kill(tmp->daemon_id, SIGUSR2);
103 munmap(tmp, sizeof(*tmp));
104 }
700d350d 105}
b09f3215 106
e8efa18d 107/* Reader (the disk dumper daemon) */
700d350d 108
a85b8f41 109static pid_t traced_pid = 0;
4c992ad5 110static pid_t traced_tid = 0;
e8efa18d 111static int parent_exited = 0;
c095a20d 112static int fd_process = -1;
113static char outfile_name[PATH_MAX];
114static char identifier_name[PATH_MAX];
700d350d 115
e8efa18d 116/* signal handling */
117static void handler_sigusr1(int signo)
700d350d 118{
8a9103df 119 dbg_printf("LTT Signal %d received : parent buffer switch.\n", signo);
e8efa18d 120}
121
122static void handler_sigusr2(int signo)
123{
8a9103df 124 dbg_printf("LTT Signal %d received : parent exited.\n", signo);
e8efa18d 125 parent_exited = 1;
126}
127
128static void handler_sigalarm(int signo)
129{
8a9103df 130 dbg_printf("LTT Signal %d received\n", signo);
e8efa18d 131
a85b8f41 132 if(getppid() != traced_pid) {
e8efa18d 133 /* Parent died */
8a9103df 134 dbg_printf("LTT Parent %lu died, cleaning up\n", traced_pid);
a85b8f41 135 traced_pid = 0;
e8efa18d 136 }
137 alarm(3);
b09f3215 138}
139
47d7d576 140/* Do a buffer switch. Don't switch if buffer is completely empty */
32f2b04a 141static void flush_buffer(struct ltt_buf *ltt_buf, enum force_switch_mode mode)
47d7d576 142{
32f2b04a 143 uint64_t tsc;
144 int offset_begin, offset_end, offset_old;
145 int reserve_commit_diff;
146 int consumed_old, consumed_new;
147 int commit_count, reserve_count;
148 int end_switch_old;
47d7d576 149
32f2b04a 150 do {
151 offset_old = atomic_read(&ltt_buf->offset);
152 offset_begin = offset_old;
153 end_switch_old = 0;
154 tsc = ltt_get_timestamp();
155 if(tsc == 0) {
156 /* Error in getting the timestamp : should not happen : it would
157 * mean we are called from an NMI during a write seqlock on xtime. */
158 return;
159 }
160
161 if(SUBBUF_OFFSET(offset_begin, ltt_buf) != 0) {
162 offset_begin = SUBBUF_ALIGN(offset_begin, ltt_buf);
163 end_switch_old = 1;
164 } else {
165 /* we do not have to switch : buffer is empty */
166 return;
167 }
168 if(mode == FORCE_ACTIVE)
169 offset_begin += ltt_subbuf_header_len(ltt_buf);
170 /* Always begin_switch in FORCE_ACTIVE mode */
171
172 /* Test new buffer integrity */
173 reserve_commit_diff =
174 atomic_read(
175 &ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])
176 - atomic_read(
177 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
178 if(reserve_commit_diff == 0) {
179 /* Next buffer not corrupted. */
180 if(mode == FORCE_ACTIVE
181 && (offset_begin-atomic_read(&ltt_buf->consumed))
182 >= ltt_buf->alloc_size) {
183 /* We do not overwrite non consumed buffers and we are full : ignore
184 switch while tracing is active. */
185 return;
186 }
187 } else {
188 /* Next subbuffer corrupted. Force pushing reader even in normal mode */
189 }
190
191 offset_end = offset_begin;
192 } while(atomic_cmpxchg(&ltt_buf->offset, offset_old, offset_end)
193 != offset_old);
194
195
196 if(mode == FORCE_ACTIVE) {
197 /* Push the reader if necessary */
198 do {
199 consumed_old = atomic_read(&ltt_buf->consumed);
200 /* If buffer is in overwrite mode, push the reader consumed count if
201 the write position has reached it and we are not at the first
202 iteration (don't push the reader farther than the writer).
203 This operation can be done concurrently by many writers in the
204 same buffer, the writer being at the fartest write position sub-buffer
205 index in the buffer being the one which will win this loop. */
206 /* If the buffer is not in overwrite mode, pushing the reader only
207 happen if a sub-buffer is corrupted */
680b9daa 208 if((SUBBUF_TRUNC(offset_end-1, ltt_buf)
32f2b04a 209 - SUBBUF_TRUNC(consumed_old, ltt_buf))
210 >= ltt_buf->alloc_size)
211 consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf);
212 else {
213 consumed_new = consumed_old;
214 break;
215 }
216 } while(atomic_cmpxchg(&ltt_buf->consumed, consumed_old, consumed_new)
217 != consumed_old);
218
219 if(consumed_old != consumed_new) {
220 /* Reader pushed : we are the winner of the push, we can therefore
221 reequilibrate reserve and commit. Atomic increment of the commit
222 count permits other writers to play around with this variable
223 before us. We keep track of corrupted_subbuffers even in overwrite
224 mode :
225 we never want to write over a non completely committed sub-buffer :
226 possible causes : the buffer size is too low compared to the unordered
227 data input, or there is a writer who died between the reserve and the
228 commit. */
229 if(reserve_commit_diff) {
230 /* We have to alter the sub-buffer commit count : a sub-buffer is
231 corrupted */
232 atomic_add(reserve_commit_diff,
233 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
234 atomic_inc(&ltt_buf->corrupted_subbuffers);
235 }
236 }
237 }
238
239 /* Always switch */
240
241 if(end_switch_old) {
242 /* old subbuffer */
243 /* Concurrency safe because we are the last and only thread to alter this
244 sub-buffer. As long as it is not delivered and read, no other thread can
245 alter the offset, alter the reserve_count or call the
246 client_buffer_end_callback on this sub-buffer.
247 The only remaining threads could be the ones with pending commits. They
248 will have to do the deliver themself.
249 Not concurrency safe in overwrite mode. We detect corrupted subbuffers with
250 commit and reserve counts. We keep a corrupted sub-buffers count and push
251 the readers across these sub-buffers.
252 Not concurrency safe if a writer is stalled in a subbuffer and
253 another writer switches in, finding out it's corrupted. The result will be
254 than the old (uncommited) subbuffer will be declared corrupted, and that
255 the new subbuffer will be declared corrupted too because of the commit
256 count adjustment.
257 Offset old should never be 0. */
258 ltt_buffer_end_callback(ltt_buf, tsc, offset_old,
259 SUBBUF_INDEX((offset_old), ltt_buf));
260 /* Setting this reserve_count will allow the sub-buffer to be delivered by
261 the last committer. */
262 reserve_count = atomic_add_return((SUBBUF_OFFSET((offset_old-1),
263 ltt_buf) + 1),
264 &ltt_buf->reserve_count[SUBBUF_INDEX((offset_old),
265 ltt_buf)]);
266 if(reserve_count == atomic_read(
267 &ltt_buf->commit_count[SUBBUF_INDEX((offset_old), ltt_buf)])) {
268 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX((offset_old), ltt_buf), NULL);
269 }
270 }
271
272 if(mode == FORCE_ACTIVE) {
273 /* New sub-buffer */
274 /* This code can be executed unordered : writers may already have written
275 to the sub-buffer before this code gets executed, caution. */
276 /* The commit makes sure that this code is executed before the deliver
277 of this sub-buffer */
278 ltt_buffer_begin_callback(ltt_buf, tsc, SUBBUF_INDEX(offset_begin, ltt_buf));
279 commit_count = atomic_add_return(ltt_subbuf_header_len(ltt_buf),
280 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
281 /* Check if the written buffer has to be delivered */
282 if(commit_count == atomic_read(
283 &ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])) {
284 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX(offset_begin, ltt_buf), NULL);
285 }
286 }
47d7d576 287
288}
289
c095a20d 290
291static int open_output_files(void)
292{
293 int ret;
294 int fd;
295 /* Open output files */
296 umask(00000);
297 ret = mkdir(LTT_USERTRACE_ROOT, 0777);
298 if(ret < 0 && errno != EEXIST) {
299 perror("LTT Error in creating output (mkdir)");
300 exit(-1);
301 }
302 ret = chdir(LTT_USERTRACE_ROOT);
303 if(ret < 0) {
304 perror("LTT Error in creating output (chdir)");
305 exit(-1);
306 }
307 snprintf(identifier_name, PATH_MAX-1, "%lu.%lu.%llu",
308 traced_tid, traced_pid, get_cycles());
309 snprintf(outfile_name, PATH_MAX-1, "process-%s", identifier_name);
310
311#ifndef LTT_NULL_OUTPUT_TEST
312 fd = creat(outfile_name, 0644);
313#else
314 /* NULL test */
315 ret = symlink("/dev/null", outfile_name);
316 if(ret < 0) {
317 perror("error in symlink");
318 exit(-1);
319 }
320 fd = open(outfile_name, O_WRONLY);
321 if(fd_process < 0) {
322 perror("Error in open");
323 exit(-1);
324 }
325#endif //LTT_NULL_OUTPUT_TEST
326 return fd;
327}
328
47d7d576 329static inline int ltt_buffer_get(struct ltt_buf *ltt_buf,
330 unsigned int *offset)
331{
332 unsigned int consumed_old, consumed_idx;
333 consumed_old = atomic_read(&ltt_buf->consumed);
334 consumed_idx = SUBBUF_INDEX(consumed_old, ltt_buf);
335
336 if(atomic_read(&ltt_buf->commit_count[consumed_idx])
337 != atomic_read(&ltt_buf->reserve_count[consumed_idx])) {
338 return -EAGAIN;
339 }
340 if((SUBBUF_TRUNC(atomic_read(&ltt_buf->offset), ltt_buf)
341 -SUBBUF_TRUNC(consumed_old, ltt_buf)) == 0) {
342 return -EAGAIN;
343 }
344
345 *offset = consumed_old;
346
347 return 0;
348}
349
350static inline int ltt_buffer_put(struct ltt_buf *ltt_buf,
351 unsigned int offset)
352{
353 unsigned int consumed_old, consumed_new;
354 int ret;
355
356 consumed_old = offset;
357 consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf);
358 if(atomic_cmpxchg(&ltt_buf->consumed, consumed_old, consumed_new)
359 != consumed_old) {
360 /* We have been pushed by the writer : the last buffer read _is_
361 * corrupted!
362 * It can also happen if this is a buffer we never got. */
363 return -EIO;
364 } else {
e0cd021d 365 if(traced_pid == 0 || parent_exited) return 0;
366
85b94320 367 ret = sem_post(&ltt_buf->writer_sem);
368 if(ret < 0) {
369 printf("error in sem_post");
47d7d576 370 }
371 }
372}
373
47d7d576 374static int read_subbuffer(struct ltt_buf *ltt_buf, int fd)
375{
32f2b04a 376 unsigned int consumed_old;
47d7d576 377 int err;
8a9103df 378 dbg_printf("LTT read buffer\n");
47d7d576 379
380
32f2b04a 381 err = ltt_buffer_get(ltt_buf, &consumed_old);
5ffa9d14 382 if(err != 0) {
8a9103df 383 if(err != -EAGAIN) dbg_printf("LTT Reserving sub buffer failed\n");
47d7d576 384 goto get_error;
385 }
c095a20d 386 if(fd_process == -1) {
387 fd_process = fd = open_output_files();
388 }
47d7d576 389
390 err = TEMP_FAILURE_RETRY(write(fd,
391 ltt_buf->start
392 + (consumed_old & ((ltt_buf->alloc_size)-1)),
393 ltt_buf->subbuf_size));
394
395 if(err < 0) {
396 perror("Error in writing to file");
397 goto write_error;
398 }
399#if 0
400 err = fsync(pair->trace);
401 if(err < 0) {
402 ret = errno;
403 perror("Error in writing to file");
404 goto write_error;
405 }
406#endif //0
407write_error:
32f2b04a 408 err = ltt_buffer_put(ltt_buf, consumed_old);
47d7d576 409
410 if(err != 0) {
411 if(err == -EIO) {
8a9103df 412 dbg_printf("Reader has been pushed by the writer, last subbuffer corrupted.\n");
47d7d576 413 /* FIXME : we may delete the last written buffer if we wish. */
414 }
415 goto get_error;
416 }
417
418get_error:
419 return err;
420}
e8efa18d 421
a85b8f41 422/* This function is called by ltt_rw_init which has signals blocked */
700d350d 423static void ltt_usertrace_fast_daemon(struct ltt_trace_info *shared_trace_info,
4c992ad5 424 sigset_t oldset, pid_t l_traced_pid, pthread_t l_traced_tid)
700d350d 425{
426 struct sigaction act;
427 int ret;
77b31f39 428
a85b8f41 429 traced_pid = l_traced_pid;
4c992ad5 430 traced_tid = l_traced_tid;
e8efa18d 431
8a9103df 432 dbg_printf("LTT ltt_usertrace_fast_daemon : init is %d, pid is %lu, traced_pid is %lu, traced_tid is %lu\n",
4c992ad5 433 shared_trace_info->init, getpid(), traced_pid, traced_tid);
700d350d 434
e8efa18d 435 act.sa_handler = handler_sigusr1;
700d350d 436 act.sa_flags = 0;
437 sigemptyset(&(act.sa_mask));
438 sigaddset(&(act.sa_mask), SIGUSR1);
439 sigaction(SIGUSR1, &act, NULL);
e8efa18d 440
441 act.sa_handler = handler_sigusr2;
442 act.sa_flags = 0;
443 sigemptyset(&(act.sa_mask));
444 sigaddset(&(act.sa_mask), SIGUSR2);
445 sigaction(SIGUSR2, &act, NULL);
446
447 act.sa_handler = handler_sigalarm;
448 act.sa_flags = 0;
449 sigemptyset(&(act.sa_mask));
450 sigaddset(&(act.sa_mask), SIGALRM);
451 sigaction(SIGALRM, &act, NULL);
452
e8efa18d 453 alarm(3);
454
700d350d 455 while(1) {
c095a20d 456 ret = sigsuspend(&oldset);
457 if(ret != -1) {
458 perror("LTT Error in sigsuspend\n");
459 }
a85b8f41 460 if(traced_pid == 0) break; /* parent died */
e8efa18d 461 if(parent_exited) break;
8a9103df 462 dbg_printf("LTT Doing a buffer switch read. pid is : %lu\n", getpid());
7076f16d 463
47d7d576 464 do {
b402c055 465 ret = read_subbuffer(&shared_trace_info->channel.process, fd_process);
47d7d576 466 } while(ret == 0);
700d350d 467 }
32f2b04a 468 /* The parent thread is dead and we have finished with the buffer */
469
470 /* Buffer force switch (flush). Using FLUSH instead of ACTIVE because we know
471 * there is no writer. */
b402c055 472 flush_buffer(&shared_trace_info->channel.process, FORCE_FLUSH);
47d7d576 473 do {
b402c055 474 ret = read_subbuffer(&shared_trace_info->channel.process, fd_process);
47d7d576 475 } while(ret == 0);
476
e0cd021d 477 if(fd_process != -1)
478 close(fd_process);
77b31f39 479
85b94320 480 ret = sem_destroy(&shared_trace_info->channel.process.writer_sem);
481 if(ret < 0) {
482 perror("error in sem_destroy");
483 }
e8efa18d 484 munmap(shared_trace_info, sizeof(*shared_trace_info));
485
486 exit(0);
700d350d 487}
b09f3215 488
e8efa18d 489
490/* Reader-writer initialization */
491
492static enum ltt_process_role { LTT_ROLE_WRITER, LTT_ROLE_READER }
493 role = LTT_ROLE_WRITER;
494
495
496void ltt_rw_init(void)
b09f3215 497{
700d350d 498 pid_t pid;
499 struct ltt_trace_info *shared_trace_info;
500 int ret;
501 sigset_t set, oldset;
a85b8f41 502 pid_t l_traced_pid = getpid();
4c992ad5 503 pid_t l_traced_tid = gettid();
700d350d 504
505 /* parent : create the shared memory map */
a85b8f41 506 shared_trace_info = mmap(0, sizeof(*thread_trace_info),
700d350d 507 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, 0, 0);
8a9103df 508 shared_trace_info->init=0;
509 shared_trace_info->filter=0;
510 shared_trace_info->daemon_id=0;
511 shared_trace_info->nesting=0;
b402c055 512 memset(&shared_trace_info->channel.process, 0,
513 sizeof(shared_trace_info->channel.process));
85b94320 514 //Need NPTL!
515 ret = sem_init(&shared_trace_info->channel.process.writer_sem, 1,
516 LTT_N_SUBBUFS);
517 if(ret < 0) {
518 perror("error in sem_init");
519 }
b402c055 520 shared_trace_info->channel.process.alloc_size = LTT_BUF_SIZE_PROCESS;
521 shared_trace_info->channel.process.subbuf_size = LTT_SUBBUF_SIZE_PROCESS;
522 shared_trace_info->channel.process.start =
523 shared_trace_info->channel.process_buf;
524 ltt_buffer_begin_callback(&shared_trace_info->channel.process,
5ffa9d14 525 ltt_get_timestamp(), 0);
526
a85b8f41 527 shared_trace_info->init = 1;
700d350d 528
529 /* Disable signals */
530 ret = sigfillset(&set);
531 if(ret) {
8a9103df 532 dbg_printf("LTT Error in sigfillset\n");
700d350d 533 }
534
700d350d 535 ret = pthread_sigmask(SIG_BLOCK, &set, &oldset);
536 if(ret) {
8a9103df 537 dbg_printf("LTT Error in pthread_sigmask\n");
700d350d 538 }
a85b8f41 539
700d350d 540 pid = fork();
541 if(pid > 0) {
542 /* Parent */
a85b8f41 543 shared_trace_info->daemon_id = pid;
544 thread_trace_info = shared_trace_info;
700d350d 545
546 /* Enable signals */
547 ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
548 if(ret) {
8a9103df 549 dbg_printf("LTT Error in pthread_sigmask\n");
700d350d 550 }
551 } else if(pid == 0) {
b5d612cb 552 pid_t sid;
700d350d 553 /* Child */
e8efa18d 554 role = LTT_ROLE_READER;
b5d612cb 555 sid = setsid();
b402c055 556 //Not a good idea to renice, unless futex wait eventually implement
557 //priority inheritence.
558 //ret = nice(1);
559 //if(ret < 0) {
560 // perror("Error in nice");
561 //}
b5d612cb 562 if(sid < 0) {
563 perror("Error setting sid");
564 }
77b31f39 565 ltt_usertrace_fast_daemon(shared_trace_info, oldset, l_traced_pid,
4c992ad5 566 l_traced_tid);
700d350d 567 /* Should never return */
568 exit(-1);
569 } else if(pid < 0) {
570 /* fork error */
a35eaa9c 571 perror("LTT Error in forking ltt-usertrace-fast");
700d350d 572 }
b09f3215 573}
574
e8efa18d 575static __thread struct _pthread_cleanup_buffer cleanup_buffer;
576
577void ltt_thread_init(void)
578{
579 _pthread_cleanup_push(&cleanup_buffer, ltt_usertrace_fast_cleanup, NULL);
580 ltt_rw_init();
581}
582
04180f7f 583void __attribute__((constructor)) __ltt_usertrace_fast_init(void)
b09f3215 584{
8a9103df 585 dbg_printf("LTT usertrace-fast init\n");
b09f3215 586
e8efa18d 587 ltt_rw_init();
700d350d 588}
589
590void __attribute__((destructor)) __ltt_usertrace_fast_fini(void)
591{
e8efa18d 592 if(role == LTT_ROLE_WRITER) {
8a9103df 593 dbg_printf("LTT usertrace-fast fini\n");
e8efa18d 594 ltt_usertrace_fast_cleanup(NULL);
595 }
b09f3215 596}
597
This page took 0.050192 seconds and 4 git commands to generate.