it runs
[lttv.git] / usertrace-fast / ltt-usertrace-fast.c
CommitLineData
700d350d 1/* LTTng user-space "fast" library
2 *
3 * This daemon is spawned by each traced thread (to share the mmap).
4 *
5 * Its job is to dump periodically this buffer to disk (when it receives a
6 * SIGUSR1 from its parent).
7 *
8 * It uses the control information in the shared memory area (producer/consumer
9 * count).
10 *
11 * When the parent thread dies (yes, those thing may happen) ;) , this daemon
12 * will flush the last buffer and write it to disk.
13 *
14 * Supplement note for streaming : the daemon is responsible for flushing
15 * periodically the buffer if it is streaming data.
16 *
b09f3215 17 *
700d350d 18 * Notes :
19 * shm memory is typically limited to 4096 units (system wide limit SHMMNI in
20 * /proc/sys/kernel/shmmni). As it requires computation time upon creation, we
21 * do not use it : we will use a shared mmap() instead which is passed through
22 * the fork().
23 * MAP_SHARED mmap segment. Updated when msync or munmap are called.
24 * MAP_ANONYMOUS.
25 * Memory mapped by mmap() is preserved across fork(2), with the same
26 * attributes.
27 *
28 * Eventually, there will be two mode :
29 * * Slow thread spawn : a fork() is done for each new thread. If the process
30 * dies, the data is not lost.
31 * * Fast thread spawn : a pthread_create() is done by the application for each
32 * new thread.
a85b8f41 33 *
34 * We use a timer to check periodically if the parent died. I think it is less
35 * intrusive than a ptrace() on the parent, which would get every signal. The
36 * side effect of this is that we won't be notified if the parent does an
37 * exec(). In this case, we will just sit there until the parent exits.
38 *
39 *
b09f3215 40 * Copyright 2006 Mathieu Desnoyers
41 *
42 */
43
32f2b04a 44#define _GNU_SOURCE
8b30e7bc 45#define LTT_TRACE
b09f3215 46#include <sys/types.h>
47#include <sys/wait.h>
48#include <unistd.h>
49#include <stdlib.h>
50#include <stdio.h>
51#include <signal.h>
52#include <syscall.h>
53#include <features.h>
54#include <pthread.h>
55#include <malloc.h>
56#include <string.h>
700d350d 57#include <sys/mman.h>
58#include <signal.h>
77b31f39 59#include <sys/stat.h>
60#include <fcntl.h>
61#include <stdlib.h>
62#include <sys/param.h>
47d7d576 63#include <sys/time.h>
8b30e7bc 64#include <errno.h>
77b31f39 65
32f2b04a 66#include <asm/atomic.h>
77b31f39 67#include <asm/timex.h> //for get_cycles()
b09f3215 68
8b30e7bc 69_syscall0(pid_t,gettid)
70
b5d612cb 71#include <ltt/ltt-usertrace-fast.h>
b09f3215 72
32f2b04a 73enum force_switch_mode { FORCE_ACTIVE, FORCE_FLUSH };
b09f3215 74
e8efa18d 75/* Writer (the traced application) */
b09f3215 76
e8efa18d 77__thread struct ltt_trace_info *thread_trace_info = NULL;
700d350d 78
e8efa18d 79void ltt_usertrace_fast_buffer_switch(void)
80{
a85b8f41 81 struct ltt_trace_info *tmp = thread_trace_info;
82 if(tmp)
83 kill(tmp->daemon_id, SIGUSR1);
e8efa18d 84}
700d350d 85
77b31f39 86/* The cleanup should never be called from a signal handler */
e8efa18d 87static void ltt_usertrace_fast_cleanup(void *arg)
b09f3215 88{
a85b8f41 89 struct ltt_trace_info *tmp = thread_trace_info;
90 if(tmp) {
91 thread_trace_info = NULL;
92 kill(tmp->daemon_id, SIGUSR2);
93 munmap(tmp, sizeof(*tmp));
94 }
700d350d 95}
b09f3215 96
e8efa18d 97/* Reader (the disk dumper daemon) */
700d350d 98
a85b8f41 99static pid_t traced_pid = 0;
4c992ad5 100static pid_t traced_tid = 0;
e8efa18d 101static int parent_exited = 0;
700d350d 102
e8efa18d 103/* signal handling */
104static void handler_sigusr1(int signo)
700d350d 105{
a35eaa9c 106 printf("LTT Signal %d received : parent buffer switch.\n", signo);
e8efa18d 107}
108
109static void handler_sigusr2(int signo)
110{
a35eaa9c 111 printf("LTT Signal %d received : parent exited.\n", signo);
e8efa18d 112 parent_exited = 1;
113}
114
115static void handler_sigalarm(int signo)
116{
a35eaa9c 117 printf("LTT Signal %d received\n", signo);
e8efa18d 118
a85b8f41 119 if(getppid() != traced_pid) {
e8efa18d 120 /* Parent died */
a85b8f41 121 printf("LTT Parent %lu died, cleaning up\n", traced_pid);
122 traced_pid = 0;
e8efa18d 123 }
124 alarm(3);
b09f3215 125}
126
47d7d576 127/* Do a buffer switch. Don't switch if buffer is completely empty */
32f2b04a 128static void flush_buffer(struct ltt_buf *ltt_buf, enum force_switch_mode mode)
47d7d576 129{
32f2b04a 130 uint64_t tsc;
131 int offset_begin, offset_end, offset_old;
132 int reserve_commit_diff;
133 int consumed_old, consumed_new;
134 int commit_count, reserve_count;
135 int end_switch_old;
47d7d576 136
32f2b04a 137 do {
138 offset_old = atomic_read(&ltt_buf->offset);
139 offset_begin = offset_old;
140 end_switch_old = 0;
141 tsc = ltt_get_timestamp();
142 if(tsc == 0) {
143 /* Error in getting the timestamp : should not happen : it would
144 * mean we are called from an NMI during a write seqlock on xtime. */
145 return;
146 }
147
148 if(SUBBUF_OFFSET(offset_begin, ltt_buf) != 0) {
149 offset_begin = SUBBUF_ALIGN(offset_begin, ltt_buf);
150 end_switch_old = 1;
151 } else {
152 /* we do not have to switch : buffer is empty */
153 return;
154 }
155 if(mode == FORCE_ACTIVE)
156 offset_begin += ltt_subbuf_header_len(ltt_buf);
157 /* Always begin_switch in FORCE_ACTIVE mode */
158
159 /* Test new buffer integrity */
160 reserve_commit_diff =
161 atomic_read(
162 &ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])
163 - atomic_read(
164 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
165 if(reserve_commit_diff == 0) {
166 /* Next buffer not corrupted. */
167 if(mode == FORCE_ACTIVE
168 && (offset_begin-atomic_read(&ltt_buf->consumed))
169 >= ltt_buf->alloc_size) {
170 /* We do not overwrite non consumed buffers and we are full : ignore
171 switch while tracing is active. */
172 return;
173 }
174 } else {
175 /* Next subbuffer corrupted. Force pushing reader even in normal mode */
176 }
177
178 offset_end = offset_begin;
179 } while(atomic_cmpxchg(&ltt_buf->offset, offset_old, offset_end)
180 != offset_old);
181
182
183 if(mode == FORCE_ACTIVE) {
184 /* Push the reader if necessary */
185 do {
186 consumed_old = atomic_read(&ltt_buf->consumed);
187 /* If buffer is in overwrite mode, push the reader consumed count if
188 the write position has reached it and we are not at the first
189 iteration (don't push the reader farther than the writer).
190 This operation can be done concurrently by many writers in the
191 same buffer, the writer being at the fartest write position sub-buffer
192 index in the buffer being the one which will win this loop. */
193 /* If the buffer is not in overwrite mode, pushing the reader only
194 happen if a sub-buffer is corrupted */
195 if((SUBBUF_TRUNC(offset_end, ltt_buf)
196 - SUBBUF_TRUNC(consumed_old, ltt_buf))
197 >= ltt_buf->alloc_size)
198 consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf);
199 else {
200 consumed_new = consumed_old;
201 break;
202 }
203 } while(atomic_cmpxchg(&ltt_buf->consumed, consumed_old, consumed_new)
204 != consumed_old);
205
206 if(consumed_old != consumed_new) {
207 /* Reader pushed : we are the winner of the push, we can therefore
208 reequilibrate reserve and commit. Atomic increment of the commit
209 count permits other writers to play around with this variable
210 before us. We keep track of corrupted_subbuffers even in overwrite
211 mode :
212 we never want to write over a non completely committed sub-buffer :
213 possible causes : the buffer size is too low compared to the unordered
214 data input, or there is a writer who died between the reserve and the
215 commit. */
216 if(reserve_commit_diff) {
217 /* We have to alter the sub-buffer commit count : a sub-buffer is
218 corrupted */
219 atomic_add(reserve_commit_diff,
220 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
221 atomic_inc(&ltt_buf->corrupted_subbuffers);
222 }
223 }
224 }
225
226 /* Always switch */
227
228 if(end_switch_old) {
229 /* old subbuffer */
230 /* Concurrency safe because we are the last and only thread to alter this
231 sub-buffer. As long as it is not delivered and read, no other thread can
232 alter the offset, alter the reserve_count or call the
233 client_buffer_end_callback on this sub-buffer.
234 The only remaining threads could be the ones with pending commits. They
235 will have to do the deliver themself.
236 Not concurrency safe in overwrite mode. We detect corrupted subbuffers with
237 commit and reserve counts. We keep a corrupted sub-buffers count and push
238 the readers across these sub-buffers.
239 Not concurrency safe if a writer is stalled in a subbuffer and
240 another writer switches in, finding out it's corrupted. The result will be
241 than the old (uncommited) subbuffer will be declared corrupted, and that
242 the new subbuffer will be declared corrupted too because of the commit
243 count adjustment.
244 Offset old should never be 0. */
245 ltt_buffer_end_callback(ltt_buf, tsc, offset_old,
246 SUBBUF_INDEX((offset_old), ltt_buf));
247 /* Setting this reserve_count will allow the sub-buffer to be delivered by
248 the last committer. */
249 reserve_count = atomic_add_return((SUBBUF_OFFSET((offset_old-1),
250 ltt_buf) + 1),
251 &ltt_buf->reserve_count[SUBBUF_INDEX((offset_old),
252 ltt_buf)]);
253 if(reserve_count == atomic_read(
254 &ltt_buf->commit_count[SUBBUF_INDEX((offset_old), ltt_buf)])) {
255 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX((offset_old), ltt_buf), NULL);
256 }
257 }
258
259 if(mode == FORCE_ACTIVE) {
260 /* New sub-buffer */
261 /* This code can be executed unordered : writers may already have written
262 to the sub-buffer before this code gets executed, caution. */
263 /* The commit makes sure that this code is executed before the deliver
264 of this sub-buffer */
265 ltt_buffer_begin_callback(ltt_buf, tsc, SUBBUF_INDEX(offset_begin, ltt_buf));
266 commit_count = atomic_add_return(ltt_subbuf_header_len(ltt_buf),
267 &ltt_buf->commit_count[SUBBUF_INDEX(offset_begin, ltt_buf)]);
268 /* Check if the written buffer has to be delivered */
269 if(commit_count == atomic_read(
270 &ltt_buf->reserve_count[SUBBUF_INDEX(offset_begin, ltt_buf)])) {
271 ltt_deliver_callback(ltt_buf, SUBBUF_INDEX(offset_begin, ltt_buf), NULL);
272 }
273 }
47d7d576 274
275}
276
277static inline int ltt_buffer_get(struct ltt_buf *ltt_buf,
278 unsigned int *offset)
279{
280 unsigned int consumed_old, consumed_idx;
281 consumed_old = atomic_read(&ltt_buf->consumed);
282 consumed_idx = SUBBUF_INDEX(consumed_old, ltt_buf);
283
284 if(atomic_read(&ltt_buf->commit_count[consumed_idx])
285 != atomic_read(&ltt_buf->reserve_count[consumed_idx])) {
286 return -EAGAIN;
287 }
288 if((SUBBUF_TRUNC(atomic_read(&ltt_buf->offset), ltt_buf)
289 -SUBBUF_TRUNC(consumed_old, ltt_buf)) == 0) {
290 return -EAGAIN;
291 }
292
293 *offset = consumed_old;
294
295 return 0;
296}
297
298static inline int ltt_buffer_put(struct ltt_buf *ltt_buf,
299 unsigned int offset)
300{
301 unsigned int consumed_old, consumed_new;
302 int ret;
303
304 consumed_old = offset;
305 consumed_new = SUBBUF_ALIGN(consumed_old, ltt_buf);
306 if(atomic_cmpxchg(&ltt_buf->consumed, consumed_old, consumed_new)
307 != consumed_old) {
308 /* We have been pushed by the writer : the last buffer read _is_
309 * corrupted!
310 * It can also happen if this is a buffer we never got. */
311 return -EIO;
312 } else {
313 if(atomic_read(&ltt_buf->full) == 1) {
314 /* tell the client that buffer is now unfull */
32f2b04a 315 ret = futex((unsigned long)&ltt_buf->full,
316 FUTEX_WAKE, 1, 0, 0, 0);
47d7d576 317 if(ret != 1) {
318 printf("LTT warning : race condition : writer not waiting or too many writers\n");
319 }
320 atomic_set(&ltt_buf->full, 0);
321 }
322 }
323}
324
47d7d576 325static int read_subbuffer(struct ltt_buf *ltt_buf, int fd)
326{
32f2b04a 327 unsigned int consumed_old;
47d7d576 328 int err;
329 printf("LTT read buffer\n");
330
331
32f2b04a 332 err = ltt_buffer_get(ltt_buf, &consumed_old);
5ffa9d14 333 if(err != 0) {
334 if(err != -EAGAIN) printf("LTT Reserving sub buffer failed\n");
47d7d576 335 goto get_error;
336 }
337
338 err = TEMP_FAILURE_RETRY(write(fd,
339 ltt_buf->start
340 + (consumed_old & ((ltt_buf->alloc_size)-1)),
341 ltt_buf->subbuf_size));
342
343 if(err < 0) {
344 perror("Error in writing to file");
345 goto write_error;
346 }
347#if 0
348 err = fsync(pair->trace);
349 if(err < 0) {
350 ret = errno;
351 perror("Error in writing to file");
352 goto write_error;
353 }
354#endif //0
355write_error:
32f2b04a 356 err = ltt_buffer_put(ltt_buf, consumed_old);
47d7d576 357
358 if(err != 0) {
359 if(err == -EIO) {
32f2b04a 360 printf("Reader has been pushed by the writer, last subbuffer corrupted.\n");
47d7d576 361 /* FIXME : we may delete the last written buffer if we wish. */
362 }
363 goto get_error;
364 }
365
366get_error:
367 return err;
368}
e8efa18d 369
a85b8f41 370/* This function is called by ltt_rw_init which has signals blocked */
700d350d 371static void ltt_usertrace_fast_daemon(struct ltt_trace_info *shared_trace_info,
4c992ad5 372 sigset_t oldset, pid_t l_traced_pid, pthread_t l_traced_tid)
700d350d 373{
374 struct sigaction act;
375 int ret;
77b31f39 376 int fd_fac;
377 int fd_cpu;
378 char outfile_name[PATH_MAX];
379 char identifier_name[PATH_MAX];
380
700d350d 381
a85b8f41 382 traced_pid = l_traced_pid;
4c992ad5 383 traced_tid = l_traced_tid;
e8efa18d 384
4c992ad5 385 printf("LTT ltt_usertrace_fast_daemon : init is %d, pid is %lu, traced_pid is %lu, traced_tid is %lu\n",
386 shared_trace_info->init, getpid(), traced_pid, traced_tid);
700d350d 387
e8efa18d 388 act.sa_handler = handler_sigusr1;
700d350d 389 act.sa_flags = 0;
390 sigemptyset(&(act.sa_mask));
391 sigaddset(&(act.sa_mask), SIGUSR1);
392 sigaction(SIGUSR1, &act, NULL);
e8efa18d 393
394 act.sa_handler = handler_sigusr2;
395 act.sa_flags = 0;
396 sigemptyset(&(act.sa_mask));
397 sigaddset(&(act.sa_mask), SIGUSR2);
398 sigaction(SIGUSR2, &act, NULL);
399
400 act.sa_handler = handler_sigalarm;
401 act.sa_flags = 0;
402 sigemptyset(&(act.sa_mask));
403 sigaddset(&(act.sa_mask), SIGALRM);
404 sigaction(SIGALRM, &act, NULL);
405
700d350d 406 /* Enable signals */
407 ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
408 if(ret) {
a35eaa9c 409 printf("LTT Error in pthread_sigmask\n");
700d350d 410 }
411
e8efa18d 412 alarm(3);
413
77b31f39 414 /* Open output files */
415 umask(00000);
416 ret = mkdir(LTT_USERTRACE_ROOT, 0777);
417 if(ret < 0 && errno != EEXIST) {
418 perror("LTT Error in creating output (mkdir)");
419 exit(-1);
420 }
421 ret = chdir(LTT_USERTRACE_ROOT);
422 if(ret < 0) {
423 perror("LTT Error in creating output (chdir)");
424 exit(-1);
425 }
426 snprintf(identifier_name, PATH_MAX-1, "%lu.%lu.%llu",
46903a9a 427 traced_tid, traced_pid, get_cycles());
77b31f39 428 snprintf(outfile_name, PATH_MAX-1, "facilities-%s", identifier_name);
5efa73ea 429 fd_fac = creat(outfile_name, 0644);
77b31f39 430
431 snprintf(outfile_name, PATH_MAX-1, "cpu-%s", identifier_name);
5efa73ea 432 fd_cpu = creat(outfile_name, 0644);
77b31f39 433
434
700d350d 435 while(1) {
436 pause();
a85b8f41 437 if(traced_pid == 0) break; /* parent died */
e8efa18d 438 if(parent_exited) break;
a35eaa9c 439 printf("LTT Doing a buffer switch read. pid is : %lu\n", getpid());
47d7d576 440
441 do {
32f2b04a 442 ret = read_subbuffer(&shared_trace_info->channel.cpu, fd_cpu);
47d7d576 443 } while(ret == 0);
444
445 do {
32f2b04a 446 ret = read_subbuffer(&shared_trace_info->channel.facilities, fd_fac);
47d7d576 447 } while(ret == 0);
700d350d 448 }
449
32f2b04a 450 /* The parent thread is dead and we have finished with the buffer */
451
452 /* Buffer force switch (flush). Using FLUSH instead of ACTIVE because we know
453 * there is no writer. */
454 flush_buffer(&shared_trace_info->channel.cpu, FORCE_FLUSH);
47d7d576 455 do {
32f2b04a 456 ret = read_subbuffer(&shared_trace_info->channel.cpu, fd_cpu);
47d7d576 457 } while(ret == 0);
458
459
32f2b04a 460 flush_buffer(&shared_trace_info->channel.facilities, FORCE_FLUSH);
47d7d576 461 do {
32f2b04a 462 ret = read_subbuffer(&shared_trace_info->channel.facilities, fd_fac);
47d7d576 463 } while(ret == 0);
464
77b31f39 465 close(fd_fac);
466 close(fd_cpu);
467
e8efa18d 468 munmap(shared_trace_info, sizeof(*shared_trace_info));
469
470 exit(0);
700d350d 471}
b09f3215 472
e8efa18d 473
474/* Reader-writer initialization */
475
476static enum ltt_process_role { LTT_ROLE_WRITER, LTT_ROLE_READER }
477 role = LTT_ROLE_WRITER;
478
479
480void ltt_rw_init(void)
b09f3215 481{
700d350d 482 pid_t pid;
483 struct ltt_trace_info *shared_trace_info;
484 int ret;
485 sigset_t set, oldset;
a85b8f41 486 pid_t l_traced_pid = getpid();
4c992ad5 487 pid_t l_traced_tid = gettid();
700d350d 488
489 /* parent : create the shared memory map */
a85b8f41 490 shared_trace_info = mmap(0, sizeof(*thread_trace_info),
700d350d 491 PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, 0, 0);
a85b8f41 492 memset(shared_trace_info, 0, sizeof(*shared_trace_info));
47d7d576 493 /* Tricky semaphore : is in a shared memory space, so it's ok for a fast
494 * mutex (futex). */
495 atomic_set(&shared_trace_info->channel.facilities.full, 0);
496 shared_trace_info->channel.facilities.alloc_size = LTT_BUF_SIZE_FACILITIES;
497 shared_trace_info->channel.facilities.subbuf_size = LTT_SUBBUF_SIZE_FACILITIES;
32f2b04a 498 shared_trace_info->channel.facilities.start =
499 shared_trace_info->channel.facilities_buf;
5ffa9d14 500 ltt_buffer_begin_callback(&shared_trace_info->channel.facilities,
501 ltt_get_timestamp(), 0);
32f2b04a 502
47d7d576 503 atomic_set(&shared_trace_info->channel.cpu.full, 0);
504 shared_trace_info->channel.cpu.alloc_size = LTT_BUF_SIZE_CPU;
505 shared_trace_info->channel.cpu.subbuf_size = LTT_SUBBUF_SIZE_CPU;
32f2b04a 506 shared_trace_info->channel.cpu.start = shared_trace_info->channel.cpu_buf;
5ffa9d14 507 ltt_buffer_begin_callback(&shared_trace_info->channel.cpu,
508 ltt_get_timestamp(), 0);
509
a85b8f41 510 shared_trace_info->init = 1;
700d350d 511
512 /* Disable signals */
513 ret = sigfillset(&set);
514 if(ret) {
a35eaa9c 515 printf("LTT Error in sigfillset\n");
700d350d 516 }
517
518
519 ret = pthread_sigmask(SIG_BLOCK, &set, &oldset);
520 if(ret) {
a35eaa9c 521 printf("LTT Error in pthread_sigmask\n");
700d350d 522 }
a85b8f41 523
700d350d 524 pid = fork();
525 if(pid > 0) {
526 /* Parent */
a85b8f41 527 shared_trace_info->daemon_id = pid;
528 thread_trace_info = shared_trace_info;
700d350d 529
530 /* Enable signals */
531 ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
532 if(ret) {
a35eaa9c 533 printf("LTT Error in pthread_sigmask\n");
700d350d 534 }
535 } else if(pid == 0) {
b5d612cb 536 pid_t sid;
700d350d 537 /* Child */
e8efa18d 538 role = LTT_ROLE_READER;
b5d612cb 539 sid = setsid();
540 if(sid < 0) {
541 perror("Error setting sid");
542 }
77b31f39 543 ltt_usertrace_fast_daemon(shared_trace_info, oldset, l_traced_pid,
4c992ad5 544 l_traced_tid);
700d350d 545 /* Should never return */
546 exit(-1);
547 } else if(pid < 0) {
548 /* fork error */
a35eaa9c 549 perror("LTT Error in forking ltt-usertrace-fast");
700d350d 550 }
b09f3215 551}
552
e8efa18d 553static __thread struct _pthread_cleanup_buffer cleanup_buffer;
554
555void ltt_thread_init(void)
556{
557 _pthread_cleanup_push(&cleanup_buffer, ltt_usertrace_fast_cleanup, NULL);
558 ltt_rw_init();
559}
560
04180f7f 561void __attribute__((constructor)) __ltt_usertrace_fast_init(void)
b09f3215 562{
700d350d 563 printf("LTT usertrace-fast init\n");
b09f3215 564
e8efa18d 565 ltt_rw_init();
700d350d 566}
567
568void __attribute__((destructor)) __ltt_usertrace_fast_fini(void)
569{
e8efa18d 570 if(role == LTT_ROLE_WRITER) {
571 printf("LTT usertrace-fast fini\n");
572 ltt_usertrace_fast_cleanup(NULL);
573 }
b09f3215 574}
575
This page took 0.047729 seconds and 4 git commands to generate.