remove debug flag
[lttv.git] / usertrace-fast / ltt-usertrace-fast.c
CommitLineData
700d350d 1/* LTTng user-space "fast" library
2 *
3 * This daemon is spawned by each traced thread (to share the mmap).
4 *
5 * Its job is to dump periodically this buffer to disk (when it receives a
6 * SIGUSR1 from its parent).
7 *
8 * It uses the control information in the shared memory area (producer/consumer
9 * count).
10 *
11 * When the parent thread dies (yes, those thing may happen) ;) , this daemon
12 * will flush the last buffer and write it to disk.
13 *
14 * Supplement note for streaming : the daemon is responsible for flushing
15 * periodically the buffer if it is streaming data.
16 *
b09f3215 17 *
700d350d 18 * Notes :
19 * shm memory is typically limited to 4096 units (system wide limit SHMMNI in
20 * /proc/sys/kernel/shmmni). As it requires computation time upon creation, we
21 * do not use it : we will use a shared mmap() instead which is passed through
22 * the fork().
23 * MAP_SHARED mmap segment. Updated when msync or munmap are called.
24 * MAP_ANONYMOUS.
25 * Memory mapped by mmap() is preserved across fork(2), with the same
26 * attributes.
27 *
28 * Eventually, there will be two mode :
29 * * Slow thread spawn : a fork() is done for each new thread. If the process
30 * dies, the data is not lost.
31 * * Fast thread spawn : a pthread_create() is done by the application for each
32 * new thread.
a85b8f41 33 *
34 * We use a timer to check periodically if the parent died. I think it is less
35 * intrusive than a ptrace() on the parent, which would get every signal. The
36 * side effect of this is that we won't be notified if the parent does an
37 * exec(). In this case, we will just sit there until the parent exits.
38 *
39 *
b09f3215 40 * Copyright 2006 Mathieu Desnoyers
41 *
42 */
43
8a9103df 44#define inline inline __attribute__((always_inline))
45
32f2b04a 46#define _GNU_SOURCE
8b30e7bc 47#define LTT_TRACE
b09f3215 48#include <sys/types.h>
49#include <sys/wait.h>
50#include <unistd.h>
51#include <stdlib.h>
52#include <stdio.h>
53#include <signal.h>
54#include <syscall.h>
55#include <features.h>
56#include <pthread.h>
57#include <malloc.h>
58#include <string.h>
700d350d 59#include <sys/mman.h>
60#include <signal.h>
77b31f39 61#include <sys/stat.h>
62#include <fcntl.h>
63#include <stdlib.h>
64#include <sys/param.h>
47d7d576 65#include <sys/time.h>
8b30e7bc 66#include <errno.h>
77b31f39 67
32f2b04a 68#include <asm/atomic.h>
77b31f39 69#include <asm/timex.h> //for get_cycles()
b09f3215 70
8b30e7bc 71_syscall0(pid_t,gettid)
72
b5d612cb 73#include <ltt/ltt-usertrace-fast.h>
b09f3215 74
8a9103df 75#ifdef LTT_SHOW_DEBUG
76#define dbg_printf(...) dbg_printf(__VA_ARGS__)
77#else
78#define dbg_printf(...)
79#endif //LTT_SHOW_DEBUG
80
81
32f2b04a 82enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
b09f3215 83
e8efa18d 84/* Writer (the traced application) */
b09f3215 85
e8efa18d 86__thread struct ltt_trace_info *thread_trace_info = NULL;
700d350d 87
e8efa18d 88void ltt_usertrace_fast_buffer_switch(void)
89{
a85b8f41 90 struct ltt_trace_info *tmp = thread_trace_info;
91 if(tmp)
92 kill(tmp->daemon_id, SIGUSR1);
e8efa18d 93}
700d350d 94
77b31f39 95/* The cleanup should never be called from a signal handler */
e8efa18d 96static void ltt_usertrace_fast_cleanup(void *arg)
b09f3215 97{
a85b8f41 98 struct ltt_trace_info *tmp = thread_trace_info;
99 if(tmp) {
100 thread_trace_info = NULL;
101 kill(tmp->daemon_id, SIGUSR2);
102 munmap(tmp, sizeof(*tmp));
103 }
700d350d 104}
b09f3215 105
e8efa18d 106/* Reader (the disk dumper daemon) */
700d350d 107
a85b8f41 108static pid_t traced_pid = 0;
4c992ad5 109static pid_t traced_tid = 0;
e8efa18d 110static int parent_exited = 0;
700d350d 111
e8efa18d 112/* signal handling */
113static void handler_sigusr1(int signo)
700d350d 114{
8a9103df 115 dbg_printf("LTT Signal %d received : parent buffer switch.\n", signo);
e8efa18d 116}
117
118static void handler_sigusr2(int signo)
119{
8a9103df 120 dbg_printf("LTT Signal %d received : parent exited.\n", signo);
e8efa18d 121 parent_exited = 1;
122}
123
124static void handler_sigalarm(int signo)
125{
8a9103df 126 dbg_printf("LTT Signal %d received\n", signo);
e8efa18d 127
a85b8f41 128 if(getppid() != traced_pid) {
e8efa18d 129 /* Parent died */
8a9103df 130 dbg_printf("LTT Parent %lu died, cleaning up\n", traced_pid);
a85b8f41 131 traced_pid = 0;
e8efa18d 132 }
133 alarm(3);
b09f3215 134}
135
47d7d576 136/* Do a buffer switch. Don't switch if buffer is completely empty */
32f2b04a 137static void flush_buffer(struct ltt_buf *ltt_buf, enum force_switch_mode mode)
47d7d576 138{
32f2b04a 139 uint64_t tsc;
140 int offset_begin, offset_end, offset_old;
141 int reserve_commit_diff;
142 int consumed_old, consumed_new;
143 int commit_count, reserve_count;
144 int end_switch_old;
47d7d576 145
32f2b04a 146 do {
147 offset_old = atomic_read(&ltt_buf->offset);
148 offset_begin = offset_old;
149 end_switch_old = 0;
150 tsc = ltt_get_timestamp();
151 if(tsc == 0) {
152 /* Error in getting the timestamp : should not happen : it would
153 * mean we are called from an NMI during a write seqlock on xtime. */
154 return;
155 }
156
157 if(SUBBUF_OFFSET(offset_begin, ltt_buf) != 0) {
158 offset_begin = SUBBUF_ALIGN(offset_begin, ltt_buf);
159 end_switch_old = 1;
160 } else {
161 /* we do not have to switch : buffer is empty */
162 return;
163 }
164 if(mode == FORCE_ACTIVE)
165 offset_begin += ltt_subbuf_header_len(ltt_buf);
166 /* Always begin_switch in FORCE_ACTIVE mode */
167
168 /* Test new buffer integrity */
169 reserve_commit_diff =
170 atomic_read(
171 &ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])
172 - atomic_read(
173 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
174 if(reserve_commit_diff == 0) {
175 /* Next buffer not corrupted. */
176 if(mode == FORCE_ACTIVE
177 && (offset_begin-atomic_read(&ltt_buf->consumed))
178 >= ltt_buf->alloc_size) {
179 /* We do not overwrite non consumed buffers and we are full : ignore
180 switch while tracing is active. */
181 return;
182 }
183 } else {
184 /* Next subbuffer corrupted. Force pushing reader even in normal mode */
185 }
186
187 offset_end = offset_begin;
188 } while(atomic_cmpxchg(&ltt_buf->offset, offset_old, offset_end)
189 != offset_old);
190
191
192 if(mode == FORCE_ACTIVE) {
193 /* Push the reader if necessary */
194 do {
195 consumed_old = atomic_read(&ltt_buf->consumed);
196 /* If buffer is in overwrite mode, push the reader consumed count if
197 the write position has reached it and we are not at the first
198 iteration (don't push the reader farther than the writer).
199 This operation can be done concurrently by many writers in the
200 same buffer, the writer being at the fartest write position sub-buffer
201 index in the buffer being the one which will win this loop. */
202 /* If the buffer is not in overwrite mode, pushing the reader only
203 happen if a sub-buffer is corrupted */
204 if((SUBBUF_TRUNC(offset_end, ltt_buf)
205 - SUBBUF_TRUNC(consumed_old, ltt_buf))
206 >= ltt_buf->alloc_size)
207 consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf);
208 else {
209 consumed_new = consumed_old;
210 break;
211 }
212 } while(atomic_cmpxchg(&ltt_buf->consumed, consumed_old, consumed_new)
213 != consumed_old);
214
215 if(consumed_old != consumed_new) {
216 /* Reader pushed : we are the winner of the push, we can therefore
217 reequilibrate reserve and commit. Atomic increment of the commit
218 count permits other writers to play around with this variable
219 before us. We keep track of corrupted_subbuffers even in overwrite
220 mode :
221 we never want to write over a non completely committed sub-buffer :
222 possible causes : the buffer size is too low compared to the unordered
223 data input, or there is a writer who died between the reserve and the
224 commit. */
225 if(reserve_commit_diff) {
226 /* We have to alter the sub-buffer commit count : a sub-buffer is
227 corrupted */
228 atomic_add(reserve_commit_diff,
229 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
230 atomic_inc(&ltt_buf->corrupted_subbuffers);
231 }
232 }
233 }
234
235 /* Always switch */
236
237 if(end_switch_old) {
238 /* old subbuffer */
239 /* Concurrency safe because we are the last and only thread to alter this
240 sub-buffer. As long as it is not delivered and read, no other thread can
241 alter the offset, alter the reserve_count or call the
242 client_buffer_end_callback on this sub-buffer.
243 The only remaining threads could be the ones with pending commits. They
244 will have to do the deliver themself.
245 Not concurrency safe in overwrite mode. We detect corrupted subbuffers with
246 commit and reserve counts. We keep a corrupted sub-buffers count and push
247 the readers across these sub-buffers.
248 Not concurrency safe if a writer is stalled in a subbuffer and
249 another writer switches in, finding out it's corrupted. The result will be
250 than the old (uncommited) subbuffer will be declared corrupted, and that
251 the new subbuffer will be declared corrupted too because of the commit
252 count adjustment.
253 Offset old should never be 0. */
254 ltt_buffer_end_callback(ltt_buf, tsc, offset_old,
255 SUBBUF_INDEX((offset_old), ltt_buf));
256 /* Setting this reserve_count will allow the sub-buffer to be delivered by
257 the last committer. */
258 reserve_count = atomic_add_return((SUBBUF_OFFSET((offset_old-1),
259 ltt_buf) + 1),
260 &ltt_buf->reserve_count[SUBBUF_INDEX((offset_old),
261 ltt_buf)]);
262 if(reserve_count == atomic_read(
263 &ltt_buf->commit_count[SUBBUF_INDEX((offset_old), ltt_buf)])) {
264 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX((offset_old), ltt_buf), NULL);
265 }
266 }
267
268 if(mode == FORCE_ACTIVE) {
269 /* New sub-buffer */
270 /* This code can be executed unordered : writers may already have written
271 to the sub-buffer before this code gets executed, caution. */
272 /* The commit makes sure that this code is executed before the deliver
273 of this sub-buffer */
274 ltt_buffer_begin_callback(ltt_buf, tsc, SUBBUF_INDEX(offset_begin, ltt_buf));
275 commit_count = atomic_add_return(ltt_subbuf_header_len(ltt_buf),
276 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
277 /* Check if the written buffer has to be delivered */
278 if(commit_count == atomic_read(
279 &ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])) {
280 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX(offset_begin, ltt_buf), NULL);
281 }
282 }
47d7d576 283
284}
285
286static inline int ltt_buffer_get(struct ltt_buf *ltt_buf,
287 unsigned int *offset)
288{
289 unsigned int consumed_old, consumed_idx;
290 consumed_old = atomic_read(&ltt_buf->consumed);
291 consumed_idx = SUBBUF_INDEX(consumed_old, ltt_buf);
292
293 if(atomic_read(&ltt_buf->commit_count[consumed_idx])
294 != atomic_read(&ltt_buf->reserve_count[consumed_idx])) {
295 return -EAGAIN;
296 }
297 if((SUBBUF_TRUNC(atomic_read(&ltt_buf->offset), ltt_buf)
298 -SUBBUF_TRUNC(consumed_old, ltt_buf)) == 0) {
299 return -EAGAIN;
300 }
301
302 *offset = consumed_old;
303
304 return 0;
305}
306
307static inline int ltt_buffer_put(struct ltt_buf *ltt_buf,
308 unsigned int offset)
309{
310 unsigned int consumed_old, consumed_new;
311 int ret;
312
313 consumed_old = offset;
314 consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf);
315 if(atomic_cmpxchg(&ltt_buf->consumed, consumed_old, consumed_new)
316 != consumed_old) {
317 /* We have been pushed by the writer : the last buffer read _is_
318 * corrupted!
319 * It can also happen if this is a buffer we never got. */
320 return -EIO;
321 } else {
b402c055 322 if(atomic_inc_return(&ltt_buf->writer_futex) <= 0) {
323 atomic_set(&ltt_buf->writer_futex, 1);
47d7d576 324 /* tell the client that buffer is now unfull */
b402c055 325 ret = futex((unsigned long)&ltt_buf->writer_futex,
32f2b04a 326 FUTEX_WAKE, 1, 0, 0, 0);
47d7d576 327 if(ret != 1) {
8a9103df 328 dbg_printf("LTT warning : race condition : writer not waiting or too many writers\n");
47d7d576 329 }
47d7d576 330 }
331 }
332}
333
47d7d576 334static int read_subbuffer(struct ltt_buf *ltt_buf, int fd)
335{
32f2b04a 336 unsigned int consumed_old;
47d7d576 337 int err;
8a9103df 338 dbg_printf("LTT read buffer\n");
47d7d576 339
340
32f2b04a 341 err = ltt_buffer_get(ltt_buf, &consumed_old);
5ffa9d14 342 if(err != 0) {
8a9103df 343 if(err != -EAGAIN) dbg_printf("LTT Reserving sub buffer failed\n");
47d7d576 344 goto get_error;
345 }
346
347 err = TEMP_FAILURE_RETRY(write(fd,
348 ltt_buf->start
349 + (consumed_old & ((ltt_buf->alloc_size)-1)),
350 ltt_buf->subbuf_size));
351
352 if(err < 0) {
353 perror("Error in writing to file");
354 goto write_error;
355 }
356#if 0
357 err = fsync(pair->trace);
358 if(err < 0) {
359 ret = errno;
360 perror("Error in writing to file");
361 goto write_error;
362 }
363#endif //0
364write_error:
32f2b04a 365 err = ltt_buffer_put(ltt_buf, consumed_old);
47d7d576 366
367 if(err != 0) {
368 if(err == -EIO) {
8a9103df 369 dbg_printf("Reader has been pushed by the writer, last subbuffer corrupted.\n");
47d7d576 370 /* FIXME : we may delete the last written buffer if we wish. */
371 }
372 goto get_error;
373 }
374
375get_error:
376 return err;
377}
e8efa18d 378
a85b8f41 379/* This function is called by ltt_rw_init which has signals blocked */
700d350d 380static void ltt_usertrace_fast_daemon(struct ltt_trace_info *shared_trace_info,
4c992ad5 381 sigset_t oldset, pid_t l_traced_pid, pthread_t l_traced_tid)
700d350d 382{
383 struct sigaction act;
384 int ret;
b402c055 385 int fd_process;
77b31f39 386 char outfile_name[PATH_MAX];
387 char identifier_name[PATH_MAX];
388
700d350d 389
a85b8f41 390 traced_pid = l_traced_pid;
4c992ad5 391 traced_tid = l_traced_tid;
e8efa18d 392
8a9103df 393 dbg_printf("LTT ltt_usertrace_fast_daemon : init is %d, pid is %lu, traced_pid is %lu, traced_tid is %lu\n",
4c992ad5 394 shared_trace_info->init, getpid(), traced_pid, traced_tid);
700d350d 395
e8efa18d 396 act.sa_handler = handler_sigusr1;
700d350d 397 act.sa_flags = 0;
398 sigemptyset(&(act.sa_mask));
399 sigaddset(&(act.sa_mask), SIGUSR1);
400 sigaction(SIGUSR1, &act, NULL);
e8efa18d 401
402 act.sa_handler = handler_sigusr2;
403 act.sa_flags = 0;
404 sigemptyset(&(act.sa_mask));
405 sigaddset(&(act.sa_mask), SIGUSR2);
406 sigaction(SIGUSR2, &act, NULL);
407
408 act.sa_handler = handler_sigalarm;
409 act.sa_flags = 0;
410 sigemptyset(&(act.sa_mask));
411 sigaddset(&(act.sa_mask), SIGALRM);
412 sigaction(SIGALRM, &act, NULL);
413
700d350d 414 /* Enable signals */
415 ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
416 if(ret) {
8a9103df 417 dbg_printf("LTT Error in pthread_sigmask\n");
700d350d 418 }
419
e8efa18d 420 alarm(3);
421
77b31f39 422 /* Open output files */
423 umask(00000);
424 ret = mkdir(LTT_USERTRACE_ROOT, 0777);
425 if(ret < 0 && errno != EEXIST) {
426 perror("LTT Error in creating output (mkdir)");
427 exit(-1);
428 }
429 ret = chdir(LTT_USERTRACE_ROOT);
430 if(ret < 0) {
431 perror("LTT Error in creating output (chdir)");
432 exit(-1);
433 }
434 snprintf(identifier_name, PATH_MAX-1, "%lu.%lu.%llu",
46903a9a 435 traced_tid, traced_pid, get_cycles());
b402c055 436 snprintf(outfile_name, PATH_MAX-1, "process-%s", identifier_name);
437#ifndef LTT_NULL_OUTPUT_TEST
438 fd_process = creat(outfile_name, 0644);
439#else
440 /* NULL test */
441 ret = symlink("/dev/null", outfile_name);
442 if(ret < 0) {
443 perror("error in symlink");
444 }
445 fd_process = open(outfile_name, O_WRONLY);
446 if(fd_process < 0) {
447 perror("Error in open");
448 }
449#endif //LTT_NULL_OUTPUT_TEST
77b31f39 450
700d350d 451 while(1) {
452 pause();
a85b8f41 453 if(traced_pid == 0) break; /* parent died */
e8efa18d 454 if(parent_exited) break;
8a9103df 455 dbg_printf("LTT Doing a buffer switch read. pid is : %lu\n", getpid());
47d7d576 456
457 do {
b402c055 458 ret = read_subbuffer(&shared_trace_info->channel.process, fd_process);
47d7d576 459 } while(ret == 0);
700d350d 460 }
461
32f2b04a 462 /* The parent thread is dead and we have finished with the buffer */
463
464 /* Buffer force switch (flush). Using FLUSH instead of ACTIVE because we know
465 * there is no writer. */
b402c055 466 flush_buffer(&shared_trace_info->channel.process, FORCE_FLUSH);
47d7d576 467 do {
b402c055 468 ret = read_subbuffer(&shared_trace_info->channel.process, fd_process);
47d7d576 469 } while(ret == 0);
470
471
b402c055 472 close(fd_process);
77b31f39 473
e8efa18d 474 munmap(shared_trace_info, sizeof(*shared_trace_info));
475
476 exit(0);
700d350d 477}
b09f3215 478
e8efa18d 479
480/* Reader-writer initialization */
481
482static enum ltt_process_role { LTT_ROLE_WRITER, LTT_ROLE_READER }
483 role = LTT_ROLE_WRITER;
484
485
486void ltt_rw_init(void)
b09f3215 487{
700d350d 488 pid_t pid;
489 struct ltt_trace_info *shared_trace_info;
490 int ret;
491 sigset_t set, oldset;
a85b8f41 492 pid_t l_traced_pid = getpid();
4c992ad5 493 pid_t l_traced_tid = gettid();
700d350d 494
495 /* parent : create the shared memory map */
a85b8f41 496 shared_trace_info = mmap(0, sizeof(*thread_trace_info),
700d350d 497 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, 0, 0);
8a9103df 498 shared_trace_info->init=0;
499 shared_trace_info->filter=0;
500 shared_trace_info->daemon_id=0;
501 shared_trace_info->nesting=0;
b402c055 502 memset(&shared_trace_info->channel.process, 0,
503 sizeof(shared_trace_info->channel.process));
47d7d576 504 /* Tricky semaphore : is in a shared memory space, so it's ok for a fast
505 * mutex (futex). */
b402c055 506 atomic_set(&shared_trace_info->channel.process.writer_futex, LTT_N_SUBBUFS);
507 shared_trace_info->channel.process.alloc_size = LTT_BUF_SIZE_PROCESS;
508 shared_trace_info->channel.process.subbuf_size = LTT_SUBBUF_SIZE_PROCESS;
509 shared_trace_info->channel.process.start =
510 shared_trace_info->channel.process_buf;
511 ltt_buffer_begin_callback(&shared_trace_info->channel.process,
5ffa9d14 512 ltt_get_timestamp(), 0);
513
a85b8f41 514 shared_trace_info->init = 1;
700d350d 515
516 /* Disable signals */
517 ret = sigfillset(&set);
518 if(ret) {
8a9103df 519 dbg_printf("LTT Error in sigfillset\n");
700d350d 520 }
521
522
523 ret = pthread_sigmask(SIG_BLOCK, &set, &oldset);
524 if(ret) {
8a9103df 525 dbg_printf("LTT Error in pthread_sigmask\n");
700d350d 526 }
a85b8f41 527
700d350d 528 pid = fork();
529 if(pid > 0) {
530 /* Parent */
a85b8f41 531 shared_trace_info->daemon_id = pid;
532 thread_trace_info = shared_trace_info;
700d350d 533
534 /* Enable signals */
535 ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
536 if(ret) {
8a9103df 537 dbg_printf("LTT Error in pthread_sigmask\n");
700d350d 538 }
539 } else if(pid == 0) {
b5d612cb 540 pid_t sid;
700d350d 541 /* Child */
e8efa18d 542 role = LTT_ROLE_READER;
b5d612cb 543 sid = setsid();
b402c055 544 //Not a good idea to renice, unless futex wait eventually implement
545 //priority inheritence.
546 //ret = nice(1);
547 //if(ret < 0) {
548 // perror("Error in nice");
549 //}
b5d612cb 550 if(sid < 0) {
551 perror("Error setting sid");
552 }
77b31f39 553 ltt_usertrace_fast_daemon(shared_trace_info, oldset, l_traced_pid,
4c992ad5 554 l_traced_tid);
700d350d 555 /* Should never return */
556 exit(-1);
557 } else if(pid < 0) {
558 /* fork error */
a35eaa9c 559 perror("LTT Error in forking ltt-usertrace-fast");
700d350d 560 }
b09f3215 561}
562
e8efa18d 563static __thread struct _pthread_cleanup_buffer cleanup_buffer;
564
565void ltt_thread_init(void)
566{
567 _pthread_cleanup_push(&cleanup_buffer, ltt_usertrace_fast_cleanup, NULL);
568 ltt_rw_init();
569}
570
04180f7f 571void __attribute__((constructor)) __ltt_usertrace_fast_init(void)
b09f3215 572{
8a9103df 573 dbg_printf("LTT usertrace-fast init\n");
b09f3215 574
e8efa18d 575 ltt_rw_init();
700d350d 576}
577
578void __attribute__((destructor)) __ltt_usertrace_fast_fini(void)
579{
e8efa18d 580 if(role == LTT_ROLE_WRITER) {
8a9103df 581 dbg_printf("LTT usertrace-fast fini\n");
e8efa18d 582 ltt_usertrace_fast_cleanup(NULL);
583 }
b09f3215 584}
585
This page took 0.049736 seconds and 4 git commands to generate.